Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 09 Feb 2026 20:43:59 +0000
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: f7cbcb19a9ec - stable/13 - file: Add a fd flag with O_RESOLVE_BENEATH semantics
Message-ID:  <698a470f.40327.2604f8c6@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch stable/13 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=f7cbcb19a9ecf606172d6d6c472b5ba7130a398b

commit f7cbcb19a9ecf606172d6d6c472b5ba7130a398b
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2026-02-09 16:47:16 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2026-02-09 16:54:57 +0000

    file: Add a fd flag with O_RESOLVE_BENEATH semantics
    
    The O_RESOLVE_BENEATH openat(2) flag restricts name lookups such that
    they remain under the directory referenced by the dirfd.  This commit
    introduces an implicit version of the flag, FD_RESOLVE_BENEATH, stored
    in the file descriptor entry.  When the flag is set, any lookup relative
    to that fd automatically has O_RESOLVE_BENEATH semantics.  Furthermore,
    the flag is sticky, meaning that it cannot be cleared, and it is copied
    by dup() and openat().
    
    File descriptors with FD_RESOLVE_BENEATH set may not be passed to
    fchdir(2) or fchroot(2).  Various fd lookup routines are modified to
    return fd flags to the caller.
    
    This flag will be used to address a case where jails with different root
    directories and the ability to pass SCM_RIGHTS messages across the jail
    boundary can transfer directory fds in such as way as to allow a
    filesystem escape.
    
    PR:             262180
    Reviewed by:    kib
    MFC after:      3 weeks
    Differential Revision:  https://reviews.freebsd.org/D50371
    
    (cherry picked from commit f35525ff2053e026a423e852136d73ed93c95803)
---
 lib/libc/sys/fcntl.2              | 63 +++++++++++++++++++---------
 sys/compat/cloudabi/cloudabi_fd.c |  2 +-
 sys/fs/fdescfs/fdesc_vnops.c      |  4 +-
 sys/kern/kern_descrip.c           | 87 ++++++++++++++++++++++++++++-----------
 sys/kern/uipc_syscalls.c          |  2 +-
 sys/kern/vfs_acl.c                |  4 +-
 sys/kern/vfs_cache.c              | 14 +++++--
 sys/kern/vfs_extattr.c            |  8 ++--
 sys/kern/vfs_syscalls.c           | 22 +++++++---
 sys/sys/fcntl.h                   |  2 +
 sys/sys/file.h                    |  2 +-
 sys/sys/filedesc.h                |  8 +++-
 sys/sys/namei.h                   |  1 +
 13 files changed, 153 insertions(+), 66 deletions(-)

diff --git a/lib/libc/sys/fcntl.2 b/lib/libc/sys/fcntl.2
index 5de41f4c0476..fdd29c9f5d78 100644
--- a/lib/libc/sys/fcntl.2
+++ b/lib/libc/sys/fcntl.2
@@ -27,7 +27,7 @@
 .\"
 .\"     @(#)fcntl.2	8.2 (Berkeley) 1/12/94
 .\"
-.Dd December 7, 2021
+.Dd June 5, 2025
 .Dt FCNTL 2
 .Os
 .Sh NAME
@@ -82,6 +82,11 @@ associated with the new file descriptor is cleared, so the file descriptor is
 to remain open across
 .Xr execve 2
 system calls.
+.It
+The
+.Dv FD_RESOLVE_BENEATH
+flag, described below, will be set if it was set on the original
+descriptor.
 .El
 .It Dv F_DUPFD_CLOEXEC
 Like
@@ -115,29 +120,47 @@ Use
 instead of
 .Dv F_DUP2FD .
 .It Dv F_GETFD
-Get the close-on-exec flag associated with the file descriptor
-.Fa fd
-as
-.Dv FD_CLOEXEC .
-If the returned value ANDed with
-.Dv FD_CLOEXEC
-is 0,
-the file will remain open across
-.Fn exec ,
-otherwise the file will be closed upon execution of
+Get the flags associated with the file descriptor
+.Fa fd .
+The following flags are defined:
+.Bl -tag -width FD_RESOLVE_BENEATH
+.It Dv FD_CLOEXEC
+The file will be closed upon execution of
 .Fn exec
 .Fa ( arg
 is ignored).
+Otherwise, the file descriptor will remain open.
+.It Dv FD_RESOLVE_BENEATH
+All path name lookups relative to that file descriptor
+will behave as if the lookup had
+.Dv O_RESOLVE_BENEATH
+or
+.Dv AT_RESOLVE_BENEATH
+semantics.
+It is not permitted to call
+.Xr fchdir 2
+or
+.Xr fchroot 2
+on such a file descriptor.
+The
+.Dv FD_RESOLVE_BENEATH
+flag is sticky, meaning that it is preserved by
+.Xr dup 2
+and similar operations, and opening a directory with
+.Xr openat 2
+where the directory descriptor has the flag set causes the new directory
+descriptor to also have the flag set.
+.El
 .It Dv F_SETFD
-Set the close-on-exec flag associated with
-.Fa fd
-to
-.Fa arg ,
-where
-.Fa arg
-is either 0 or
-.Dv FD_CLOEXEC ,
-as described above.
+Set flags associated with
+.Fa fd .
+The available flags are
+.Dv FD_CLOEXEC
+and
+.Dv FD_RESOLVE_BENEATH .
+The
+.Dv FD_RESOLVE_BENEATH
+flag cannot be cleared once set.
 .It Dv F_GETFL
 Get descriptor status flags, as described below
 .Fa ( arg
diff --git a/sys/compat/cloudabi/cloudabi_fd.c b/sys/compat/cloudabi/cloudabi_fd.c
index 2883adcda3a8..1c9e77a2f21b 100644
--- a/sys/compat/cloudabi/cloudabi_fd.c
+++ b/sys/compat/cloudabi/cloudabi_fd.c
@@ -389,7 +389,7 @@ cloudabi_sys_fd_stat_get(struct thread *td,
 	int error, oflags;
 
 	/* Obtain file descriptor properties. */
-	error = fget_cap(td, uap->fd, cap_rights_init(&rights), &fp,
+	error = fget_cap(td, uap->fd, cap_rights_init(&rights), NULL, &fp,
 	    &fcaps);
 	if (error != 0)
 		return (error);
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 44dfc173f49b..b0ce63b2bb1c 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -515,7 +515,7 @@ fdesc_setattr(struct vop_setattr_args *ap)
 		    cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
 	} else {
 		error = getvnode_path(td, fd,
-		    cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
+		    cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp);
 	}
 	if (error) {
 		/*
@@ -652,7 +652,7 @@ fdesc_readlink(struct vop_readlink_args *va)
 	VOP_UNLOCK(vn);
 
 	td = curthread;
-	error = fget_cap(td, fd_fd, &cap_no_rights, &fp, NULL);
+	error = fget_cap(td, fd_fd, &cap_no_rights, NULL, &fp, NULL);
 	if (error != 0)
 		goto out;
 
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index f073fc64e0bc..2412c5e4e332 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -114,8 +114,8 @@ static void	fdgrowtable_exp(struct filedesc *fdp, int nfd);
 static void	fdunused(struct filedesc *fdp, int fd);
 static void	fdused(struct filedesc *fdp, int fd);
 static int	fget_unlocked_seq(struct filedesc *fdp, int fd,
-		    const cap_rights_t *needrightsp, struct file **fpp,
-		    seqc_t *seqp);
+		    const cap_rights_t *needrightsp, uint8_t *flagsp,
+		    struct file **fpp, seqc_t *seqp);
 static int	getmaxfd(struct thread *td);
 static u_long	*filecaps_copy_prep(const struct filecaps *src);
 static void	filecaps_copy_finish(const struct filecaps *src,
@@ -520,7 +520,9 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
 		fde = fdeget_locked(fdp, fd);
 		if (fde != NULL) {
 			td->td_retval[0] =
-			    (fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0;
+			    ((fde->fde_flags & UF_EXCLOSE) ? FD_CLOEXEC : 0) |
+			    ((fde->fde_flags & UF_RESOLVE_BENEATH) ?
+			    FD_RESOLVE_BENEATH : 0);
 			error = 0;
 		}
 		FILEDESC_SUNLOCK(fdp);
@@ -531,8 +533,13 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
 		FILEDESC_XLOCK(fdp);
 		fde = fdeget_locked(fdp, fd);
 		if (fde != NULL) {
+			/*
+			 * UF_RESOLVE_BENEATH is sticky and cannot be cleared.
+			 */
 			fde->fde_flags = (fde->fde_flags & ~UF_EXCLOSE) |
-			    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
+			    ((arg & FD_CLOEXEC) != 0 ? UF_EXCLOSE : 0) |
+			    ((arg & FD_RESOLVE_BENEATH) != 0 ?
+			    UF_RESOLVE_BENEATH : 0);
 			error = 0;
 		}
 		FILEDESC_XUNLOCK(fdp);
@@ -2159,7 +2166,8 @@ _finstall(struct filedesc *fdp, struct file *fp, int fd, int flags,
 	seqc_write_begin(&fde->fde_seqc);
 #endif
 	fde->fde_file = fp;
-	fde->fde_flags = (flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0;
+	fde->fde_flags = ((flags & O_CLOEXEC) != 0 ? UF_EXCLOSE : 0) |
+	    ((flags & O_RESOLVE_BENEATH) != 0 ? UF_RESOLVE_BENEATH : 0);
 	if (fcaps != NULL)
 		filecaps_move(fcaps, &fde->fde_caps);
 	else
@@ -3012,7 +3020,7 @@ out:
 
 int
 fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
-    struct file **fpp, struct filecaps *havecapsp)
+    uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp)
 {
 	struct filedesc *fdp = td->td_proc->p_fd;
 	int error;
@@ -3026,7 +3034,8 @@ fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
 
 	*fpp = NULL;
 	for (;;) {
-		error = fget_unlocked_seq(fdp, fd, needrightsp, &fp, &seq);
+		error = fget_unlocked_seq(fdp, fd, needrightsp, flagsp, &fp,
+		    &seq);
 		if (error != 0)
 			return (error);
 
@@ -3090,7 +3099,7 @@ fget_remote(struct thread *td, struct proc *p, int fd, struct file **fpp)
 
 #ifdef CAPABILITIES
 int
-fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, int *flagsp)
 {
 	const struct filedescent *fde;
 	const struct fdescenttbl *fdt;
@@ -3100,6 +3109,7 @@ fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsear
 	const cap_rights_t *haverights;
 	cap_rights_t rights;
 	seqc_t seq;
+	int flags;
 
 	VFS_SMR_ASSERT_ENTERED();
 
@@ -3118,7 +3128,9 @@ fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsear
 		return (EAGAIN);
 	if (__predict_false(cap_check_inline_transient(haverights, &rights)))
 		return (EAGAIN);
-	*fsearch = ((fp->f_flag & FSEARCH) != 0);
+	flags = fp->f_flag & FSEARCH;
+	flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ?
+	    O_RESOLVE_BENEATH : 0;
 	vp = fp->f_vnode;
 	if (__predict_false(vp == NULL)) {
 		return (EAGAIN);
@@ -3152,16 +3164,19 @@ fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsear
 #endif
 	}
 	*vpp = vp;
+	*flagsp = flags;
 	return (0);
 }
 #else
 int
-fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch)
+fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, int *flagsp)
 {
+	const struct filedescent *fde;
 	const struct fdescenttbl *fdt;
 	struct filedesc *fdp;
 	struct file *fp;
 	struct vnode *vp;
+	int flags;
 
 	VFS_SMR_ASSERT_ENTERED();
 
@@ -3169,10 +3184,13 @@ fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsear
 	fdt = fdp->fd_files;
 	if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
 		return (EBADF);
-	fp = fdt->fdt_ofiles[fd].fde_file;
+	fde = &fdt->fdt_ofiles[fd];
+	fp = fde->fde_file;
 	if (__predict_false(fp == NULL))
 		return (EAGAIN);
-	*fsearch = ((fp->f_flag & FSEARCH) != 0);
+	flags = fp->f_flag & FSEARCH;
+	flags |= (fde->fde_flags & UF_RESOLVE_BENEATH) != 0 ?
+	    O_RESOLVE_BENEATH : 0;
 	vp = fp->f_vnode;
 	if (__predict_false(vp == NULL || vp->v_type != VDIR)) {
 		return (EAGAIN);
@@ -3187,6 +3205,7 @@ fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsear
 		return (EAGAIN);
 	filecaps_fill(&ndp->ni_filecaps);
 	*vpp = vp;
+	*flagsp = flags;
 	return (0);
 }
 #endif
@@ -3200,13 +3219,15 @@ fgetvp_lookup(int fd, struct nameidata *ndp, struct vnode **vpp)
 	struct componentname *cnp;
 	cap_rights_t rights;
 	int error;
+	uint8_t flags;
 
 	td = curthread;
 	rights = *ndp->ni_rightsneeded;
 	cap_rights_set_one(&rights, CAP_LOOKUP);
 	cnp = &ndp->ni_cnd;
 
-	error = fget_cap(td, ndp->ni_dirfd, &rights, &fp, &ndp->ni_filecaps);
+	error = fget_cap(td, ndp->ni_dirfd, &rights, &flags, &fp,
+	    &ndp->ni_filecaps);
 	if (__predict_false(error != 0))
 		return (error);
 	if (__predict_false(fp->f_ops == &badfileops)) {
@@ -3224,6 +3245,10 @@ fgetvp_lookup(int fd, struct nameidata *ndp, struct vnode **vpp)
 	 */
 	if ((fp->f_flag & FSEARCH) != 0)
 		cnp->cn_flags |= NOEXECCHECK;
+	if ((flags & UF_RESOLVE_BENEATH) != 0) {
+		cnp->cn_flags |= RBENEATH;
+		ndp->ni_resflags |= NIRES_BENEATH;
+	}
 	fdrop(fp, td);
 
 #ifdef CAPABILITIES
@@ -3258,11 +3283,9 @@ out_free:
 
 static int
 fget_unlocked_seq(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
-    struct file **fpp, seqc_t *seqp)
+    uint8_t *flagsp, struct file **fpp, seqc_t *seqp)
 {
-#ifdef CAPABILITIES
 	const struct filedescent *fde;
-#endif
 	const struct fdescenttbl *fdt;
 	struct file *fp;
 #ifdef CAPABILITIES
@@ -3270,6 +3293,7 @@ fget_unlocked_seq(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
 	cap_rights_t haverights;
 	int error;
 #endif
+	uint8_t flags;
 
 	fdt = fdp->fd_files;
 	if (__predict_false((u_int)fd >= fdt->fdt_nfiles))
@@ -3288,10 +3312,13 @@ fget_unlocked_seq(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
 		fde = &fdt->fdt_ofiles[fd];
 		haverights = *cap_rights_fde_inline(fde);
 		fp = fde->fde_file;
+		flags = fde->fde_flags;
 		if (!seqc_consistent(fd_seqc(fdt, fd), seq))
 			continue;
 #else
-		fp = fdt->fdt_ofiles[fd].fde_file;
+		fde = &fdt->fdt_ofiles[fd];
+		flags = fde->fde_flags;
+		fp = fde->fde_file;
 #endif
 		if (fp == NULL)
 			return (EBADF);
@@ -3324,6 +3351,8 @@ fget_unlocked_seq(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
 		fdrop(fp, curthread);
 	}
 	*fpp = fp;
+	if (flagsp != NULL)
+		*flagsp = flags;
 	if (seqp != NULL) {
 #ifdef CAPABILITIES
 		*seqp = seq;
@@ -3340,8 +3369,8 @@ fget_unlocked_seq(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
  * racing with itself.
  */
 int
-fget_unlocked(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
-    struct file **fpp)
+fget_unlocked_flags(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
+    uint8_t *flagsp, struct file **fpp)
 {
 #ifdef CAPABILITIES
 	const struct filedescent *fde;
@@ -3352,6 +3381,7 @@ fget_unlocked(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
 	seqc_t seq;
 	const cap_rights_t *haverights;
 #endif
+	uint8_t flags;
 
 	fdt = fdp->fd_files;
 	if (__predict_false((u_int)fd >= fdt->fdt_nfiles)) {
@@ -3363,8 +3393,10 @@ fget_unlocked(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
 	fde = &fdt->fdt_ofiles[fd];
 	haverights = cap_rights_fde_inline(fde);
 	fp = fde->fde_file;
+	flags = fde->fde_flags;
 #else
 	fp = fdt->fdt_ofiles[fd].fde_file;
+	flags = fdt->fdt_ofiles[fd].fde_flags;
 #endif
 	if (__predict_false(fp == NULL))
 		goto out_fallback;
@@ -3388,12 +3420,21 @@ fget_unlocked(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
 #endif
 		goto out_fdrop;
 	*fpp = fp;
+	if (flagsp != NULL)
+		*flagsp = flags;
 	return (0);
 out_fdrop:
 	fdrop(fp, curthread);
 out_fallback:
 	*fpp = NULL;
-	return (fget_unlocked_seq(fdp, fd, needrightsp, fpp, NULL));
+	return (fget_unlocked_seq(fdp, fd, needrightsp, flagsp, fpp, NULL));
+}
+
+int
+fget_unlocked(struct filedesc *fdp, int fd, const cap_rights_t *needrightsp,
+    struct file **fpp)
+{
+	return (fget_unlocked_flags(fdp, fd, needrightsp, NULL, fpp));
 }
 
 /*
@@ -3547,7 +3588,7 @@ fget_mmap(struct thread *td, int fd, const cap_rights_t *rightsp,
 	fdp = td->td_proc->p_fd;
 	MPASS(cap_rights_is_set(rightsp, CAP_MMAP));
 	for (;;) {
-		error = fget_unlocked_seq(fdp, fd, rightsp, &fp, &seq);
+		error = fget_unlocked_seq(fdp, fd, rightsp, NULL, &fp, &seq);
 		if (__predict_false(error != 0))
 			return (error);
 		if (__predict_false(fp->f_ops == &badfileops)) {
@@ -3602,7 +3643,7 @@ fget_fcntl(struct thread *td, int fd, const cap_rights_t *rightsp,
 	*fpp = NULL;
 	MPASS(cap_rights_is_set(rightsp, CAP_FCNTL));
 	for (;;) {
-		error = fget_unlocked_seq(fdp, fd, rightsp, &fp, &seq);
+		error = fget_unlocked_seq(fdp, fd, rightsp, NULL, &fp, &seq);
 		if (error != 0)
 			return (error);
 		error = cap_fcntl_check(fdp, fd, needfcntl);
@@ -3664,7 +3705,7 @@ fgetvp_rights(struct thread *td, int fd, const cap_rights_t *needrightsp,
 	struct file *fp;
 	int error;
 
-	error = fget_cap(td, fd, needrightsp, &fp, &caps);
+	error = fget_cap(td, fd, needrightsp, NULL, &fp, &caps);
 	if (error != 0)
 		return (error);
 	if (fp->f_ops == &badfileops) {
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 336a0dd77d5c..65ea38dc87f5 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -97,7 +97,7 @@ getsock_cap(struct thread *td, int fd, const cap_rights_t *rightsp,
 	struct file *fp;
 	int error;
 
-	error = fget_cap(td, fd, rightsp, &fp, havecapsp);
+	error = fget_cap(td, fd, rightsp, NULL, &fp, havecapsp);
 	if (error != 0)
 		return (error);
 	if (fp->f_type != DTYPE_SOCKET) {
diff --git a/sys/kern/vfs_acl.c b/sys/kern/vfs_acl.c
index ea250104aff4..3120ff007044 100644
--- a/sys/kern/vfs_acl.c
+++ b/sys/kern/vfs_acl.c
@@ -433,7 +433,7 @@ sys___acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap)
 
 	AUDIT_ARG_FD(uap->filedes);
 	error = getvnode_path(td, uap->filedes,
-	    cap_rights_init_one(&rights, CAP_ACL_GET), &fp);
+	    cap_rights_init_one(&rights, CAP_ACL_GET), NULL, &fp);
 	if (error == 0) {
 		error = vacl_get_acl(td, fp->f_vnode, uap->type, uap->aclp);
 		fdrop(fp, td);
@@ -566,7 +566,7 @@ sys___acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap)
 
 	AUDIT_ARG_FD(uap->filedes);
 	error = getvnode_path(td, uap->filedes,
-	    cap_rights_init_one(&rights, CAP_ACL_CHECK), &fp);
+	    cap_rights_init_one(&rights, CAP_ACL_CHECK), NULL, &fp);
 	if (error == 0) {
 		error = vacl_aclcheck(td, fp->f_vnode, uap->type, uap->aclp);
 		fdrop(fp, td);
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 0afb65c55c5a..a7f8179ceb55 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -4365,17 +4365,23 @@ cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp)
 {
 	struct nameidata *ndp;
 	struct componentname *cnp;
-	int error;
-	bool fsearch;
+	int error, flags;
 
 	ndp = fpl->ndp;
 	cnp = fpl->cnp;
 
-	error = fgetvp_lookup_smr(ndp->ni_dirfd, ndp, vpp, &fsearch);
+	error = fgetvp_lookup_smr(ndp->ni_dirfd, ndp, vpp, &flags);
 	if (__predict_false(error != 0)) {
 		return (cache_fpl_aborted(fpl));
 	}
-	fpl->fsearch = fsearch;
+	if (__predict_false((flags & O_RESOLVE_BENEATH) != 0)) {
+		_Static_assert((CACHE_FPL_SUPPORTED_CN_FLAGS & RBENEATH) == 0,
+		    "RBENEATH supported by fplookup");
+		cache_fpl_smr_exit(fpl);
+		cache_fpl_aborted(fpl);
+		return (EOPNOTSUPP);
+	}
+	fpl->fsearch = (flags & FSEARCH) != 0;
 	if ((*vpp)->v_type != VDIR) {
 		if (!((cnp->cn_flags & EMPTYPATH) != 0 && cnp->cn_pnbuf[0] == '\0')) {
 			cache_fpl_smr_exit(fpl);
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index c9007c308b58..664d0b4edc5f 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -241,7 +241,7 @@ sys_extattr_set_fd(struct thread *td, struct extattr_set_fd_args *uap)
 	AUDIT_ARG_TEXT(attrname);
 
 	error = getvnode_path(td, uap->fd,
-	    cap_rights_init_one(&rights, CAP_EXTATTR_SET), &fp);
+	    cap_rights_init_one(&rights, CAP_EXTATTR_SET), NULL, &fp);
 	if (error)
 		return (error);
 
@@ -408,7 +408,7 @@ sys_extattr_get_fd(struct thread *td, struct extattr_get_fd_args *uap)
 	AUDIT_ARG_TEXT(attrname);
 
 	error = getvnode_path(td, uap->fd,
-	    cap_rights_init_one(&rights, CAP_EXTATTR_GET), &fp);
+	    cap_rights_init_one(&rights, CAP_EXTATTR_GET), NULL, &fp);
 	if (error)
 		return (error);
 
@@ -543,7 +543,7 @@ sys_extattr_delete_fd(struct thread *td, struct extattr_delete_fd_args *uap)
 	AUDIT_ARG_TEXT(attrname);
 
 	error = getvnode_path(td, uap->fd,
-	    cap_rights_init_one(&rights, CAP_EXTATTR_DELETE), &fp);
+	    cap_rights_init_one(&rights, CAP_EXTATTR_DELETE), NULL, &fp);
 	if (error)
 		return (error);
 
@@ -689,7 +689,7 @@ sys_extattr_list_fd(struct thread *td, struct extattr_list_fd_args *uap)
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_VALUE(uap->attrnamespace);
 	error = getvnode_path(td, uap->fd,
-	    cap_rights_init_one(&rights, CAP_EXTATTR_LIST), &fp);
+	    cap_rights_init_one(&rights, CAP_EXTATTR_LIST), NULL, &fp);
 	if (error)
 		return (error);
 
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 75568e858b78..e7353d1f5f9a 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -371,7 +371,7 @@ kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
 	int error;
 
 	AUDIT_ARG_FD(fd);
-	error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp);
+	error = getvnode_path(td, fd, &cap_fstatfs_rights, NULL, &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
@@ -884,12 +884,17 @@ sys_fchdir(struct thread *td, struct fchdir_args *uap)
 	struct mount *mp;
 	struct file *fp;
 	int error;
+	uint8_t fdflags;
 
 	AUDIT_ARG_FD(uap->fd);
-	error = getvnode_path(td, uap->fd, &cap_fchdir_rights,
+	error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fdflags,
 	    &fp);
 	if (error != 0)
 		return (error);
+	if ((fdflags & UF_RESOLVE_BENEATH) != 0) {
+		fdrop(fp, td);
+		return (ENOTCAPABLE);
+	}
 	vp = fp->f_vnode;
 	vrefact(vp);
 	fdrop(fp, td);
@@ -1240,6 +1245,10 @@ success:
 		else
 #endif
 			fcaps = NULL;
+		if ((nd.ni_resflags & NIRES_BENEATH) != 0)
+			flags |= O_RESOLVE_BENEATH;
+		else
+			flags &= ~O_RESOLVE_BENEATH;
 		error = finstall_refed(td, fp, &indx, flags, fcaps);
 		/* On success finstall_refed() consumes fcaps. */
 		if (error != 0) {
@@ -1930,7 +1939,7 @@ kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
 
 	fp = NULL;
 	if (fd != FD_NONE) {
-		error = getvnode_path(td, fd, &cap_no_rights, &fp);
+		error = getvnode_path(td, fd, &cap_no_rights, NULL, &fp);
 		if (error != 0)
 			return (error);
 	}
@@ -4313,12 +4322,13 @@ out:
  */
 int
 getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp,
-    struct file **fpp)
+    uint8_t *flagsp, struct file **fpp)
 {
 	struct file *fp;
 	int error;
 
-	error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp);
+	error = fget_unlocked_flags(td->td_proc->p_fd, fd, rightsp, flagsp,
+	    &fp);
 	if (error != 0)
 		return (error);
 
@@ -4355,7 +4365,7 @@ getvnode(struct thread *td, int fd, const cap_rights_t *rightsp,
 {
 	int error;
 
-	error = getvnode_path(td, fd, rightsp, fpp);
+	error = getvnode_path(td, fd, rightsp, NULL, fpp);
 
 	/*
 	 * Filter out O_PATH file descriptors, most getvnode() callers
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index ef3e2fbe3ca1..2b7a14027b57 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -280,6 +280,8 @@ typedef	__pid_t		pid_t;
 
 /* file descriptor flags (F_GETFD, F_SETFD) */
 #define	FD_CLOEXEC	1		/* close-on-exec flag */
+#define	FD_RESOLVE_BENEATH 2		/* all lookups relative to fd have
+					   O_RESOLVE_BENEATH semantics */
 
 /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */
 #define	F_RDLCK		1		/* shared or read lock */
diff --git a/sys/sys/file.h b/sys/sys/file.h
index 4c0ec276c700..d3e4a29d0803 100644
--- a/sys/sys/file.h
+++ b/sys/sys/file.h
@@ -292,7 +292,7 @@ int fgetvp_read(struct thread *td, int fd, const cap_rights_t *rightsp,
     struct vnode **vpp);
 int fgetvp_write(struct thread *td, int fd, const cap_rights_t *rightsp,
     struct vnode **vpp);
-int fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, bool *fsearch);
+int fgetvp_lookup_smr(int fd, struct nameidata *ndp, struct vnode **vpp, int *flagsp);
 int fgetvp_lookup(int fd, struct nameidata *ndp, struct vnode **vpp);
 
 static __inline __result_use_check bool
diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h
index bf7a27e37161..a39716fdce0b 100644
--- a/sys/sys/filedesc.h
+++ b/sys/sys/filedesc.h
@@ -136,6 +136,7 @@ struct filedesc_to_leader {
  * Per-process open flags.
  */
 #define	UF_EXCLOSE	0x01		/* auto-close on exec */
+#define	UF_RESOLVE_BENEATH 0x02		/* lookups must be beneath this dir */
 
 #ifdef _KERNEL
 
@@ -270,17 +271,20 @@ struct filedesc_to_leader *
 int	getvnode(struct thread *td, int fd, const cap_rights_t *rightsp,
 	    struct file **fpp);
 int	getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp,
-	    struct file **fpp);
+	    uint8_t *flagsp, struct file **fpp);
 void	mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
 
 int	fget_cap_locked(struct filedesc *fdp, int fd,
 	    const cap_rights_t *needrightsp, struct file **fpp,
 	    struct filecaps *havecapsp);
 int	fget_cap(struct thread *td, int fd, const cap_rights_t *needrightsp,
-	    struct file **fpp, struct filecaps *havecapsp);
+	    uint8_t *flagsp, struct file **fpp, struct filecaps *havecapsp);
 /* Return a referenced file from an unlocked descriptor. */
 int	fget_unlocked(struct filedesc *fdp, int fd,
 	    const cap_rights_t *needrightsp, struct file **fpp);
+int	fget_unlocked_flags(struct filedesc *fdp, int fd,
+	    const cap_rights_t *needrightsp, uint8_t *flagsp,
+	    struct file **fpp);
 /* Return a file pointer without a ref. FILEDESC_IS_ONLY_USER must be true.  */
 int	fget_only_user(struct filedesc *fdp, int fd,
 	    const cap_rights_t *needrightsp, struct file **fpp);
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index 8ae9f0d786d6..51aa9a6e4f67 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -208,6 +208,7 @@ int	cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
 #define	NIRES_ABS	0x00000001 /* Path was absolute */
 #define	NIRES_STRICTREL	0x00000002 /* Restricted lookup result */
 #define	NIRES_EMPTYPATH	0x00000004 /* EMPTYPATH used */
+#define	NIRES_BENEATH	0x00000008 /* O_RESOLVE_BENEATH is to be inherited */
 
 /*
  * Flags in ni_lcf, valid for the duration of the namei call.


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?698a470f.40327.2604f8c6>