Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 9 Feb 2023 20:40:46 GMT
From:      =?utf-8?Q?Dag-Erling=20Sm=C3=B8rgrav?= <des@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: d6d22a4530f9 - stable/13 - cp: Add tests involving sparse files.
Message-ID:  <202302092040.319KekJ7094032@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by des:

URL: https://cgit.FreeBSD.org/src/commit/?id=d6d22a4530f9c55fc7a48deaf19d2ba7047bff1a

commit d6d22a4530f9c55fc7a48deaf19d2ba7047bff1a
Author:     Dag-Erling Smørgrav <des@FreeBSD.org>
AuthorDate: 2023-02-01 20:06:24 +0000
Commit:     Dag-Erling Smørgrav <des@FreeBSD.org>
CommitDate: 2023-02-09 20:32:56 +0000

    cp: Add tests involving sparse files.
    
    MFC after:      1 week
    Sponsored by:   Klara, Inc.
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D38290
    
    (cherry picked from commit 822fa7ae1e3e7ed277e47e6de355387e524c6ee4)
    
    cp: Simplify the common case.
    
    * The allocated buffer is only used in the fallback case, so move it
      there.  The argument for passing it in from the caller was that if
      malloc(3) were to fail, we'd want it to fail before we started
      copying anything, but firstly, it was already not in the right place
      to ensure that, and secondly, malloc(3) never fails (except in very
      contrived circumstances, such as an unreasonable RLIMIT_AS or
      RLIMIT_DATA).
    
    * Remove the mmap(2) option.  It is almost never beneficial,
      especially when the alternative is copy_file_range(2), and it adds
      needless complexity and indentation.
    
    MFC after:      1 week
    Sponsored by:   Klara, Inc.
    Reviewed by:    rmacklem, mav
    Differential Revision:  https://reviews.freebsd.org/D38291
    
    (cherry picked from commit 6c85042afcbbf4cd0fb7e7c03226c7249081e690)
    
    cp: Minor code cleanup.
    
    * Fix includes in utils.c, cf. style(9).
    * Fix type mismatch: readlink(2) returns ssize_t, not int.
    * It is not necessary to set errno to 0 as fts_read(3) already does it.
    
    MFC after:      1 week
    Sponsored by:   Klara, Inc.
    Reviewed by:    allanjude
    Differential Revision:  https://reviews.freebsd.org/D38369
    
    (cherry picked from commit cb96a0ef0040fa7968245ab203ab70a7ed2d275d)
    
    cp: Adjust the sparse file tests.
    
    * The sparsity check was ineffective: it compared the apparent size in bytes to the actual size in blocks.  Instead, write a tool that reliably detects sparseness.
    * Some of the seq commands were missing an argument.
    * Based on empirical evidence, 1 MB holes are not necessarily large enough to be preserved by the underlying filesystem.  Increase the hole size to 16 MB.
    
    MFC after:      1 week
    Sponsored by:   Klara, Inc.
    Reviewed by:    cracauer
    Differential Revision:  https://reviews.freebsd.org/D38414
    
    (cherry picked from commit 8b418c83d175fde3b1f65210509ddcf2ac248d9f)
---
 bin/cp/Makefile         |   2 +-
 bin/cp/cp.c             |   3 +-
 bin/cp/tests/Makefile   |   2 +
 bin/cp/tests/cp_test.sh |  90 ++++++++++++++++++++++++++++++++++
 bin/cp/tests/sparse.c   |  73 ++++++++++++++++++++++++++++
 bin/cp/utils.c          | 127 +++++++++++++-----------------------------------
 6 files changed, 202 insertions(+), 95 deletions(-)

diff --git a/bin/cp/Makefile b/bin/cp/Makefile
index 0c4c7b5eff53..a73ee5447769 100644
--- a/bin/cp/Makefile
+++ b/bin/cp/Makefile
@@ -6,7 +6,7 @@
 PACKAGE=runtime
 PROG=	cp
 SRCS=	cp.c utils.c
-CFLAGS+= -DVM_AND_BUFFER_CACHE_SYNCHRONIZED -D_ACL_PRIVATE
+CFLAGS+= -D_ACL_PRIVATE
 
 HAS_TESTS=
 SUBDIR.${MK_TESTS}=	tests
diff --git a/bin/cp/cp.c b/bin/cp/cp.c
index dbae4b535843..e38cd97f4369 100644
--- a/bin/cp/cp.c
+++ b/bin/cp/cp.c
@@ -311,8 +311,7 @@ copy(char *argv[], enum op type, int fts_options, struct stat *root_stat)
 	recurse_path = NULL;
 	if ((ftsp = fts_open(argv, fts_options, NULL)) == NULL)
 		err(1, "fts_open");
-	for (badcp = rval = 0; errno = 0, (curr = fts_read(ftsp)) != NULL;
-            badcp = 0) {
+	for (badcp = rval = 0; (curr = fts_read(ftsp)) != NULL; badcp = 0) {
 		switch (curr->fts_info) {
 		case FTS_NS:
 		case FTS_DNR:
diff --git a/bin/cp/tests/Makefile b/bin/cp/tests/Makefile
index faad22df713a..1e480ad706d1 100644
--- a/bin/cp/tests/Makefile
+++ b/bin/cp/tests/Makefile
@@ -3,5 +3,7 @@
 PACKAGE=	tests
 
 ATF_TESTS_SH=	cp_test
+PROGS+=		sparse
+BINDIR=		${TESTSDIR}
 
 .include <bsd.test.mk>
diff --git a/bin/cp/tests/cp_test.sh b/bin/cp/tests/cp_test.sh
index 7362168d7303..559a5ecc3c00 100755
--- a/bin/cp/tests/cp_test.sh
+++ b/bin/cp/tests/cp_test.sh
@@ -199,6 +199,91 @@ recursive_link_Lflag_body()
 	    '(' ! -L foo-mirror/foo/baz ')'
 }
 
+file_is_sparse()
+{
+	atf_check ${0%/*}/sparse "$1"
+}
+
+files_are_equal()
+{
+	atf_check test "$(stat -f "%d %i" "$1")" != "$(stat -f "%d %i" "$2")"
+	atf_check cmp "$1" "$2"
+}
+
+atf_test_case sparse_leading_hole
+sparse_leading_hole_body()
+{
+	# A 16-megabyte hole followed by one megabyte of data
+	truncate -s 16M foo
+	seq -f%015g 65536 >>foo
+	file_is_sparse foo
+
+	atf_check cp foo bar
+	files_are_equal foo bar
+	file_is_sparse bar
+}
+
+atf_test_case sparse_multiple_holes
+sparse_multiple_holes_body()
+{
+	# Three one-megabyte blocks of data preceded, separated, and
+	# followed by 16-megabyte holes
+	truncate -s 16M foo
+	seq -f%015g 65536 >>foo
+	truncate -s 33M foo
+	seq -f%015g 65536 >>foo
+	truncate -s 50M foo
+	seq -f%015g 65536 >>foo
+	truncate -s 67M foo
+	file_is_sparse foo
+
+	atf_check cp foo bar
+	files_are_equal foo bar
+	file_is_sparse bar
+}
+
+atf_test_case sparse_only_hole
+sparse_only_hole_body()
+{
+	# A 16-megabyte hole
+	truncate -s 16M foo
+	file_is_sparse foo
+
+	atf_check cp foo bar
+	files_are_equal foo bar
+	file_is_sparse bar
+}
+
+atf_test_case sparse_to_dev
+sparse_to_dev_body()
+{
+	# Three one-megabyte blocks of data preceded, separated, and
+	# followed by 16-megabyte holes
+	truncate -s 16M foo
+	seq -f%015g 65536 >>foo
+	truncate -s 33M foo
+	seq -f%015g 65536 >>foo
+	truncate -s 50M foo
+	seq -f%015g 65536 >>foo
+	truncate -s 67M foo
+	file_is_sparse foo
+
+	atf_check -o file:foo cp foo /dev/stdout
+}
+
+atf_test_case sparse_trailing_hole
+sparse_trailing_hole_body()
+{
+	# One megabyte of data followed by a 16-megabyte hole
+	seq -f%015g 65536 >foo
+	truncate -s 17M foo
+	file_is_sparse foo
+
+	atf_check cp foo bar
+	files_are_equal foo bar
+	file_is_sparse bar
+}
+
 atf_test_case standalone_Pflag
 standalone_Pflag_body()
 {
@@ -221,5 +306,10 @@ atf_init_test_cases()
 	atf_add_test_case recursive_link_dflt
 	atf_add_test_case recursive_link_Hflag
 	atf_add_test_case recursive_link_Lflag
+	atf_add_test_case sparse_leading_hole
+	atf_add_test_case sparse_multiple_holes
+	atf_add_test_case sparse_only_hole
+	atf_add_test_case sparse_to_dev
+	atf_add_test_case sparse_trailing_hole
 	atf_add_test_case standalone_Pflag
 }
diff --git a/bin/cp/tests/sparse.c b/bin/cp/tests/sparse.c
new file mode 100644
index 000000000000..78957581a56c
--- /dev/null
+++ b/bin/cp/tests/sparse.c
@@ -0,0 +1,73 @@
+/*-
+ * Copyright (c) 2023 Klara, Inc.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <err.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+static bool verbose;
+
+/*
+ * Returns true if the file named by its argument is sparse, i.e. if
+ * seeking to SEEK_HOLE returns a different value than seeking to
+ * SEEK_END.
+ */
+static bool
+sparse(const char *filename)
+{
+	off_t hole, end;
+	int fd;
+
+	if ((fd = open(filename, O_RDONLY)) < 0 ||
+	    (hole = lseek(fd, 0, SEEK_HOLE)) < 0 ||
+	    (end = lseek(fd, 0, SEEK_END)) < 0)
+		err(1, "%s", filename);
+	close(fd);
+	if (end > hole) {
+		if (verbose)
+			printf("%s: hole at %zu\n", filename, (size_t)hole);
+		return (true);
+	}
+	return (false);
+}
+
+static void
+usage(void)
+{
+
+	fprintf(stderr, "usage: sparse [-v] file [...]\n");
+	exit(EX_USAGE);
+}
+
+int
+main(int argc, char *argv[])
+{
+	int opt, rv;
+
+	while ((opt = getopt(argc, argv, "v")) != -1) {
+		switch (opt) {
+		case 'v':
+			verbose = true;
+			break;
+		default:
+			usage();
+			break;
+		}
+	}
+	argc -= optind;
+	argv += optind;
+	if (argc == 0)
+		usage();
+	rv = EXIT_SUCCESS;
+	while (argc-- > 0)
+		if (!sparse(*argv++))
+			rv = EXIT_FAILURE;
+	exit(rv);
+}
diff --git a/bin/cp/utils.c b/bin/cp/utils.c
index 07de0495ba9e..8c1c350ff6f1 100644
--- a/bin/cp/utils.c
+++ b/bin/cp/utils.c
@@ -37,13 +37,9 @@ static char sccsid[] = "@(#)utils.c	8.3 (Berkeley) 4/1/94";
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include <sys/types.h>
-#include <sys/acl.h>
 #include <sys/param.h>
+#include <sys/acl.h>
 #include <sys/stat.h>
-#ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED
-#include <sys/mman.h>
-#endif
 
 #include <err.h>
 #include <errno.h>
@@ -75,11 +71,22 @@ __FBSDID("$FreeBSD$");
 #define BUFSIZE_SMALL (MAXPHYS)
 
 static ssize_t
-copy_fallback(int from_fd, int to_fd, char *buf, size_t bufsize)
+copy_fallback(int from_fd, int to_fd)
 {
+	static char *buf = NULL;
+	static size_t bufsize;
 	ssize_t rcount, wresid, wcount = 0;
 	char *bufp;
 
+	if (buf == NULL) {
+		if (sysconf(_SC_PHYS_PAGES) > PHYSPAGES_THRESHOLD)
+			bufsize = MIN(BUFSIZE_MAX, MAXPHYS * 8);
+		else
+			bufsize = BUFSIZE_SMALL;
+		buf = malloc(bufsize);
+		if (buf == NULL)
+			err(1, "Not enough memory");
+	}
 	rcount = read(from_fd, buf, bufsize);
 	if (rcount <= 0)
 		return (rcount);
@@ -96,16 +103,10 @@ copy_fallback(int from_fd, int to_fd, char *buf, size_t bufsize)
 int
 copy_file(const FTSENT *entp, int dne)
 {
-	static char *buf = NULL;
-	static size_t bufsize;
 	struct stat *fs;
 	ssize_t wcount;
 	off_t wtotal;
 	int ch, checkch, from_fd, rval, to_fd;
-#ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED
-	size_t wresid;
-	char *bufp, *p;
-#endif
 	int use_copy_file_range = 1;
 
 	from_fd = to_fd = -1;
@@ -174,89 +175,31 @@ copy_file(const FTSENT *entp, int dne)
 	rval = 0;
 
 	if (!lflag && !sflag) {
-		/*
-		 * Mmap and write if less than 8M (the limit is so we don't
-		 * totally trash memory on big files.  This is really a minor
-		 * hack, but it wins some CPU back.
-		 * Some filesystems, such as smbnetfs, don't support mmap,
-		 * so this is a best-effort attempt.
-		 */
-#ifdef VM_AND_BUFFER_CACHE_SYNCHRONIZED
-		if (S_ISREG(fs->st_mode) && fs->st_size > 0 &&
-		    fs->st_size <= 8 * 1024 * 1024 &&
-		    (p = mmap(NULL, (size_t)fs->st_size, PROT_READ,
-		    MAP_SHARED, from_fd, (off_t)0)) != MAP_FAILED) {
-			wtotal = 0;
-			for (bufp = p, wresid = fs->st_size; ;
-			    bufp += wcount, wresid -= (size_t)wcount) {
-				wcount = write(to_fd, bufp, wresid);
-				if (wcount <= 0)
-					break;
-				wtotal += wcount;
-				if (info) {
-					info = 0;
-					(void)fprintf(stderr,
-					    "%s -> %s %3d%%\n",
-					    entp->fts_path, to.p_path,
-					    cp_pct(wtotal, fs->st_size));
+		wtotal = 0;
+		do {
+			if (use_copy_file_range) {
+				wcount = copy_file_range(from_fd, NULL,
+				    to_fd, NULL, SSIZE_MAX, 0);
+				if (wcount < 0 && errno == EINVAL) {
+					/* Prob a non-seekable FD */
+					use_copy_file_range = 0;
 				}
-				if (wcount >= (ssize_t)wresid)
-					break;
 			}
-			if (wcount != (ssize_t)wresid) {
-				warn("%s", to.p_path);
-				rval = 1;
-			}
-			/* Some systems don't unmap on close(2). */
-			if (munmap(p, fs->st_size) < 0) {
-				warn("%s", entp->fts_path);
-				rval = 1;
-			}
-		} else
-#endif
-		{
-			if (buf == NULL) {
-				/*
-				 * Note that buf and bufsize are static. If
-				 * malloc() fails, it will fail at the start
-				 * and not copy only some files. 
-				 */ 
-				if (sysconf(_SC_PHYS_PAGES) > 
-				    PHYSPAGES_THRESHOLD)
-					bufsize = MIN(BUFSIZE_MAX, MAXPHYS * 8);
-				else
-					bufsize = BUFSIZE_SMALL;
-				buf = malloc(bufsize);
-				if (buf == NULL)
-					err(1, "Not enough memory");
+			if (!use_copy_file_range) {
+				wcount = copy_fallback(from_fd, to_fd);
 			}
-			wtotal = 0;
-			do {
-				if (use_copy_file_range) {
-					wcount = copy_file_range(from_fd, NULL,
-					    to_fd, NULL, SSIZE_MAX, 0);
-					if (wcount < 0 && errno == EINVAL) {
-						/* Prob a non-seekable FD */
-						use_copy_file_range = 0;
-					}
-				}
-				if (!use_copy_file_range) {
-					wcount = copy_fallback(from_fd, to_fd,
-					    buf, bufsize);
-				}
-				wtotal += wcount;
-				if (info) {
-					info = 0;
-					(void)fprintf(stderr,
-					    "%s -> %s %3d%%\n",
-					    entp->fts_path, to.p_path,
-					    cp_pct(wtotal, fs->st_size));
-				}
-			} while (wcount > 0);
-			if (wcount < 0) {
-				warn("%s", entp->fts_path);
-				rval = 1;
+			wtotal += wcount;
+			if (info) {
+				info = 0;
+				(void)fprintf(stderr,
+				    "%s -> %s %3d%%\n",
+				    entp->fts_path, to.p_path,
+				    cp_pct(wtotal, fs->st_size));
 			}
+		} while (wcount > 0);
+		if (wcount < 0) {
+			warn("%s", entp->fts_path);
+			rval = 1;
 		}
 	} else if (lflag) {
 		if (link(entp->fts_path, to.p_path)) {
@@ -297,7 +240,7 @@ done:
 int
 copy_link(const FTSENT *p, int exists)
 {
-	int len;
+	ssize_t len;
 	char llink[PATH_MAX];
 
 	if (exists && nflag) {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202302092040.319KekJ7094032>