Date: Thu, 27 Jan 2022 18:02:41 GMT From: Kyle Evans <kevans@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 848263aad129 - main - cp: fix some cases with infinite recursion Message-ID: <202201271802.20RI2frC035360@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=848263aad129c8f9de75b58a5ab9a010611b75ac commit 848263aad129c8f9de75b58a5ab9a010611b75ac Author: Kyle Evans <kevans@FreeBSD.org> AuthorDate: 2022-01-27 18:02:17 +0000 Commit: Kyle Evans <kevans@FreeBSD.org> CommitDate: 2022-01-27 18:02:17 +0000 cp: fix some cases with infinite recursion As noted in the PR, cp -R has some surprising behavior. Typically, when you `cp -R foo bar` where both foo and bar exist, foo is cleanly copied to foo/bar. When you `cp -R foo foo` (where foo clearly exists), cp(1) goes a little off the rails as it creates foo/foo, then discovers that and creates foo/foo/foo, so on and so forth, until it eventually fails. POSIX doesn't seem to disallow this behavior, but it isn't very useful. GNU cp(1) will detect the recursion and squash it, but emit a message in the process that it has done so. This change seemingly follows the GNU behavior, but it currently doesn't warn about the situation -- the author feels that the final product is about what one might expect from doing this and thus, doesn't need a warning. The author doesn't feel strongly about this. PR: 235438 Reviewed by: bapt Sponsored by: Klara, Inc. Differential Revision: https://reviews.freebsd.org/D33944 --- bin/cp/cp.c | 75 +++++++++++++++++++++++++++++++++++++++++++---- bin/cp/tests/cp_test.sh | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 5 deletions(-) diff --git a/bin/cp/cp.c b/bin/cp/cp.c index 3a23394df35d..4328b89cfd7a 100644 --- a/bin/cp/cp.c +++ b/bin/cp/cp.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include <sys/types.h> #include <sys/stat.h> +#include <assert.h> #include <err.h> #include <errno.h> #include <fts.h> @@ -91,7 +92,7 @@ volatile sig_atomic_t info; enum op { FILE_TO_FILE, FILE_TO_DIR, DIR_TO_DNE }; -static int copy(char *[], enum op, int); +static int copy(char *[], enum op, int, struct stat *); static void siginfo(int __unused); int @@ -259,18 +260,26 @@ main(int argc, char *argv[]) */ type = FILE_TO_DIR; - exit (copy(argv, type, fts_options)); + /* + * For DIR_TO_DNE, we could provide copy() with the to_stat we've + * already allocated on the stack here that isn't being used for + * anything. Not doing so, though, simplifies later logic a little bit + * as we need to skip checking root_stat on the first iteration and + * ensure that we set it with the first mkdir(). + */ + exit (copy(argv, type, fts_options, (type == DIR_TO_DNE ? NULL : + &to_stat))); } static int -copy(char *argv[], enum op type, int fts_options) +copy(char *argv[], enum op type, int fts_options, struct stat *root_stat) { - struct stat to_stat; + struct stat created_root_stat, to_stat; FTS *ftsp; FTSENT *curr; int base = 0, dne, badcp, rval; size_t nlen; - char *p, *target_mid; + char *p, *recurse_path, *target_mid; mode_t mask, mode; /* @@ -280,6 +289,7 @@ copy(char *argv[], enum op type, int fts_options) mask = ~umask(0777); umask(~mask); + recurse_path = NULL; if ((ftsp = fts_open(argv, fts_options, NULL)) == NULL) err(1, "fts_open"); for (badcp = rval = 0; errno = 0, (curr = fts_read(ftsp)) != NULL; @@ -300,6 +310,47 @@ copy(char *argv[], enum op type, int fts_options) ; } + if (curr->fts_info == FTS_D && type != FILE_TO_FILE && + root_stat != NULL && + root_stat->st_dev == curr->fts_statp->st_dev && + root_stat->st_ino == curr->fts_statp->st_ino) { + assert(recurse_path == NULL); + if (curr->fts_level > FTS_ROOTLEVEL) { + /* + * If the recursion isn't at the immediate + * level, we can just not traverse into this + * directory. + */ + fts_set(ftsp, curr, FTS_SKIP); + continue; + } else { + const char *slash; + + /* + * Grab the last path component and double it, + * to make life easier later and ensure that + * we work even with fts_level == 0 is a couple + * of components deep in fts_path. No path + * separators are fine and expected in the + * common case, though. + */ + slash = strrchr(curr->fts_path, '/'); + if (slash != NULL) + slash++; + else + slash = curr->fts_path; + if (asprintf(&recurse_path, "%s/%s", + curr->fts_path, slash) == -1) + err(1, "asprintf"); + } + } + + if (recurse_path != NULL && + strcmp(curr->fts_path, recurse_path) == 0) { + fts_set(ftsp, curr, FTS_SKIP); + continue; + } + /* * If we are in case (2) or (3) above, we need to append the * source name to the target name. @@ -448,6 +499,19 @@ copy(char *argv[], enum op type, int fts_options) if (mkdir(to.p_path, curr->fts_statp->st_mode | S_IRWXU) < 0) err(1, "%s", to.p_path); + /* + * First DNE with a NULL root_stat is the root + * path, so set root_stat. We can't really + * tell in all cases if the target path is + * within the src path, so we just stat() the + * first directory we created and use that. + */ + if (root_stat == NULL && + stat(to.p_path, &created_root_stat) == -1) { + err(1, "stat"); + } else if (root_stat == NULL) { + root_stat = &created_root_stat; + } } else if (!S_ISDIR(to_stat.st_mode)) { errno = ENOTDIR; err(1, "%s", to.p_path); @@ -493,6 +557,7 @@ copy(char *argv[], enum op type, int fts_options) if (errno) err(1, "fts_read"); fts_close(ftsp); + free(recurse_path); return (rval); } diff --git a/bin/cp/tests/cp_test.sh b/bin/cp/tests/cp_test.sh index 753e69f2d619..3220b45b4438 100755 --- a/bin/cp/tests/cp_test.sh +++ b/bin/cp/tests/cp_test.sh @@ -57,8 +57,85 @@ chrdev_body() check_size trunc 0 } +atf_test_case matching_srctgt +matching_srctgt_body() +{ + + # PR235438: `cp -R foo foo` would previously infinitely recurse and + # eventually error out. + mkdir foo + echo "qux" > foo/bar + cp foo/bar foo/zoo + + atf_check cp -R foo foo + atf_check -o inline:"qux\n" cat foo/foo/bar + atf_check -o inline:"qux\n" cat foo/foo/zoo + atf_check -e not-empty -s not-exit:0 stat foo/foo/foo +} + +atf_test_case matching_srctgt_contained +matching_srctgt_contained_body() +{ + + # Let's do the same thing, except we'll try to recursively copy foo into + # one of its subdirectories. + mkdir foo + echo "qux" > foo/bar + mkdir foo/loo + mkdir foo/moo + mkdir foo/roo + cp foo/bar foo/zoo + + atf_check cp -R foo foo/moo + atf_check -o inline:"qux\n" cat foo/moo/foo/bar + atf_check -o inline:"qux\n" cat foo/moo/foo/zoo + atf_check -e not-empty -s not-exit:0 stat foo/moo/foo/moo +} + +atf_test_case matching_srctgt_link +matching_srctgt_link_body() +{ + + mkdir foo + echo "qux" > foo/bar + cp foo/bar foo/zoo + + atf_check ln -s foo roo + atf_check cp -RH roo foo + atf_check -o inline:"qux\n" cat foo/roo/bar + atf_check -o inline:"qux\n" cat foo/roo/zoo +} + +atf_test_case matching_srctgt_nonexistent +matching_srctgt_nonexistent_body() +{ + + # We'll copy foo to a nonexistent subdirectory; ideally, we would + # skip just the directory and end up with a layout like; + # + # foo/ + # bar + # dne/ + # bar + # zoo + # zoo + # + mkdir foo + echo "qux" > foo/bar + cp foo/bar foo/zoo + + atf_check cp -R foo foo/dne + atf_check -o inline:"qux\n" cat foo/dne/bar + atf_check -o inline:"qux\n" cat foo/dne/zoo + atf_check -e not-empty -s not-exit:0 stat foo/dne/foo +} + atf_init_test_cases() { atf_add_test_case basic atf_add_test_case chrdev + atf_add_test_case matching_srctgt + atf_add_test_case matching_srctgt_contained + atf_add_test_case matching_srctgt_link + atf_add_test_case matching_srctgt_nonexistent }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202201271802.20RI2frC035360>