Date: Sun, 24 Apr 2022 13:29:18 GMT From: Kyle Evans <kevans@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: 5237a02ba86b - stable/13 - cp: fix some cases with infinite recursion Message-ID: <202204241329.23ODTI03010487@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=5237a02ba86b5957d10d87148bbc0efb3f2f6f82 commit 5237a02ba86b5957d10d87148bbc0efb3f2f6f82 Author: Kyle Evans <kevans@FreeBSD.org> AuthorDate: 2022-01-27 18:02:17 +0000 Commit: Kyle Evans <kevans@FreeBSD.org> CommitDate: 2022-04-24 03:51:13 +0000 cp: fix some cases with infinite recursion As noted in the PR, cp -R has some surprising behavior. Typically, when you `cp -R foo bar` where both foo and bar exist, foo is cleanly copied to foo/bar. When you `cp -R foo foo` (where foo clearly exists), cp(1) goes a little off the rails as it creates foo/foo, then discovers that and creates foo/foo/foo, so on and so forth, until it eventually fails. POSIX doesn't seem to disallow this behavior, but it isn't very useful. GNU cp(1) will detect the recursion and squash it, but emit a message in the process that it has done so. This change seemingly follows the GNU behavior, but it currently doesn't warn about the situation -- the author feels that the final product is about what one might expect from doing this and thus, doesn't need a warning. The author doesn't feel strongly about this. PR: 235438 Reviewed by: bapt Sponsored by: Klara, Inc. (cherry picked from commit 848263aad129c8f9de75b58a5ab9a010611b75ac) --- bin/cp/cp.c | 75 +++++++++++++++++++++++++++++++++++++++++++---- bin/cp/tests/cp_test.sh | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 5 deletions(-) diff --git a/bin/cp/cp.c b/bin/cp/cp.c index e846b0ee6dd2..f132bb940c09 100644 --- a/bin/cp/cp.c +++ b/bin/cp/cp.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include <sys/types.h> #include <sys/stat.h> +#include <assert.h> #include <err.h> #include <errno.h> #include <fts.h> @@ -91,7 +92,7 @@ volatile sig_atomic_t info; enum op { FILE_TO_FILE, FILE_TO_DIR, DIR_TO_DNE }; -static int copy(char *[], enum op, int); +static int copy(char *[], enum op, int, struct stat *); static void siginfo(int __unused); int @@ -261,7 +262,15 @@ main(int argc, char *argv[]) */ type = FILE_TO_DIR; - exit (copy(argv, type, fts_options)); + /* + * For DIR_TO_DNE, we could provide copy() with the to_stat we've + * already allocated on the stack here that isn't being used for + * anything. Not doing so, though, simplifies later logic a little bit + * as we need to skip checking root_stat on the first iteration and + * ensure that we set it with the first mkdir(). + */ + exit (copy(argv, type, fts_options, (type == DIR_TO_DNE ? NULL : + &to_stat))); } /* Does the right thing based on -R + -H/-L/-P */ @@ -281,14 +290,14 @@ copy_stat(const char *path, struct stat *sb) static int -copy(char *argv[], enum op type, int fts_options) +copy(char *argv[], enum op type, int fts_options, struct stat *root_stat) { - struct stat to_stat; + struct stat created_root_stat, to_stat; FTS *ftsp; FTSENT *curr; int base = 0, dne, badcp, rval; size_t nlen; - char *p, *target_mid; + char *p, *recurse_path, *target_mid; mode_t mask, mode; /* @@ -298,6 +307,7 @@ copy(char *argv[], enum op type, int fts_options) mask = ~umask(0777); umask(~mask); + recurse_path = NULL; if ((ftsp = fts_open(argv, fts_options, NULL)) == NULL) err(1, "fts_open"); for (badcp = rval = 0; errno = 0, (curr = fts_read(ftsp)) != NULL; @@ -318,6 +328,47 @@ copy(char *argv[], enum op type, int fts_options) ; } + if (curr->fts_info == FTS_D && type != FILE_TO_FILE && + root_stat != NULL && + root_stat->st_dev == curr->fts_statp->st_dev && + root_stat->st_ino == curr->fts_statp->st_ino) { + assert(recurse_path == NULL); + if (curr->fts_level > FTS_ROOTLEVEL) { + /* + * If the recursion isn't at the immediate + * level, we can just not traverse into this + * directory. + */ + fts_set(ftsp, curr, FTS_SKIP); + continue; + } else { + const char *slash; + + /* + * Grab the last path component and double it, + * to make life easier later and ensure that + * we work even with fts_level == 0 is a couple + * of components deep in fts_path. No path + * separators are fine and expected in the + * common case, though. + */ + slash = strrchr(curr->fts_path, '/'); + if (slash != NULL) + slash++; + else + slash = curr->fts_path; + if (asprintf(&recurse_path, "%s/%s", + curr->fts_path, slash) == -1) + err(1, "asprintf"); + } + } + + if (recurse_path != NULL && + strcmp(curr->fts_path, recurse_path) == 0) { + fts_set(ftsp, curr, FTS_SKIP); + continue; + } + /* * If we are in case (2) or (3) above, we need to append the * source name to the target name. @@ -466,6 +517,19 @@ copy(char *argv[], enum op type, int fts_options) if (mkdir(to.p_path, curr->fts_statp->st_mode | S_IRWXU) < 0) err(1, "%s", to.p_path); + /* + * First DNE with a NULL root_stat is the root + * path, so set root_stat. We can't really + * tell in all cases if the target path is + * within the src path, so we just stat() the + * first directory we created and use that. + */ + if (root_stat == NULL && + stat(to.p_path, &created_root_stat) == -1) { + err(1, "stat"); + } else if (root_stat == NULL) { + root_stat = &created_root_stat; + } } else if (!S_ISDIR(to_stat.st_mode)) { errno = ENOTDIR; err(1, "%s", to.p_path); @@ -511,6 +575,7 @@ copy(char *argv[], enum op type, int fts_options) if (errno) err(1, "fts_read"); fts_close(ftsp); + free(recurse_path); return (rval); } diff --git a/bin/cp/tests/cp_test.sh b/bin/cp/tests/cp_test.sh index eb8852a579c5..adb6ea24d9e7 100755 --- a/bin/cp/tests/cp_test.sh +++ b/bin/cp/tests/cp_test.sh @@ -72,6 +72,79 @@ chrdev_body() check_size trunc 0 } +atf_test_case matching_srctgt +matching_srctgt_body() +{ + + # PR235438: `cp -R foo foo` would previously infinitely recurse and + # eventually error out. + mkdir foo + echo "qux" > foo/bar + cp foo/bar foo/zoo + + atf_check cp -R foo foo + atf_check -o inline:"qux\n" cat foo/foo/bar + atf_check -o inline:"qux\n" cat foo/foo/zoo + atf_check -e not-empty -s not-exit:0 stat foo/foo/foo +} + +atf_test_case matching_srctgt_contained +matching_srctgt_contained_body() +{ + + # Let's do the same thing, except we'll try to recursively copy foo into + # one of its subdirectories. + mkdir foo + echo "qux" > foo/bar + mkdir foo/loo + mkdir foo/moo + mkdir foo/roo + cp foo/bar foo/zoo + + atf_check cp -R foo foo/moo + atf_check -o inline:"qux\n" cat foo/moo/foo/bar + atf_check -o inline:"qux\n" cat foo/moo/foo/zoo + atf_check -e not-empty -s not-exit:0 stat foo/moo/foo/moo +} + +atf_test_case matching_srctgt_link +matching_srctgt_link_body() +{ + + mkdir foo + echo "qux" > foo/bar + cp foo/bar foo/zoo + + atf_check ln -s foo roo + atf_check cp -RH roo foo + atf_check -o inline:"qux\n" cat foo/roo/bar + atf_check -o inline:"qux\n" cat foo/roo/zoo +} + +atf_test_case matching_srctgt_nonexistent +matching_srctgt_nonexistent_body() +{ + + # We'll copy foo to a nonexistent subdirectory; ideally, we would + # skip just the directory and end up with a layout like; + # + # foo/ + # bar + # dne/ + # bar + # zoo + # zoo + # + mkdir foo + echo "qux" > foo/bar + cp foo/bar foo/zoo + + atf_check cp -R foo foo/dne + atf_check -o inline:"qux\n" cat foo/dne/bar + atf_check -o inline:"qux\n" cat foo/dne/zoo + atf_check -e not-empty -s not-exit:0 stat foo/dne/foo +} + recursive_link_setup() { extra_cpflag=$1 @@ -132,6 +205,10 @@ atf_init_test_cases() atf_add_test_case basic atf_add_test_case basic_symlink atf_add_test_case chrdev + atf_add_test_case matching_srctgt + atf_add_test_case matching_srctgt_contained + atf_add_test_case matching_srctgt_link + atf_add_test_case matching_srctgt_nonexistent atf_add_test_case recursive_link_dflt atf_add_test_case recursive_link_Hflag atf_add_test_case recursive_link_Lflag
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202204241329.23ODTI03010487>