Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 13 Aug 2011 09:21:16 +0000 (UTC)
From:      Jonathan Anderson <jonathan@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r224810 - in head/sys: kern sys
Message-ID:  <201108130921.p7D9LGIK005244@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jonathan
Date: Sat Aug 13 09:21:16 2011
New Revision: 224810
URL: http://svn.freebsd.org/changeset/base/224810

Log:
  Allow Capsicum capabilities to delegate constrained
  access to file system subtrees to sandboxed processes.
  
  - Use of absolute paths and '..' are limited in capability mode.
  - Use of absolute paths and '..' are limited when looking up relative
    to a capability.
  - When a name lookup is performed, identify what operation is to be
    performed (such as CAP_MKDIR) as well as check for CAP_LOOKUP.
  
  With these constraints, openat() and friends are now safe in capability
  mode, and can then be used by code such as the capability-mode runtime
  linker.
  
  Approved by: re (bz), mentor (rwatson)
  Sponsored by: Google Inc

Modified:
  head/sys/kern/kern_descrip.c
  head/sys/kern/sys_capability.c
  head/sys/kern/vfs_lookup.c
  head/sys/kern/vfs_syscalls.c
  head/sys/sys/capability.h
  head/sys/sys/namei.h

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c	Sat Aug 13 00:56:42 2011	(r224809)
+++ head/sys/kern/kern_descrip.c	Sat Aug 13 09:21:16 2011	(r224810)
@@ -2336,6 +2336,16 @@ _fget(struct thread *td, int fd, struct 
 
 #ifdef CAPABILITIES
 	/*
+	 * If this is a capability, what rights does it have?
+	 */
+	if (haverightsp != NULL) {
+		if (fp->f_type == DTYPE_CAPABILITY)
+			*haverightsp = cap_rights(fp);
+		else
+			*haverightsp = CAP_MASK_VALID;
+	}
+
+	/*
 	 * If a capability has been requested, return the capability directly.
 	 * Otherwise, check capability rights, extract the underlying object,
 	 * and check its access flags.

Modified: head/sys/kern/sys_capability.c
==============================================================================
--- head/sys/kern/sys_capability.c	Sat Aug 13 00:56:42 2011	(r224809)
+++ head/sys/kern/sys_capability.c	Sat Aug 13 09:21:16 2011	(r224810)
@@ -220,7 +220,7 @@ cap_new(struct thread *td, struct cap_ne
 {
 	int error, capfd;
 	int fd = uap->fd;
-	struct file *fp, *fcapp;
+	struct file *fp;
 	cap_rights_t rights = uap->rights;
 
 	AUDIT_ARG_FD(fd);
@@ -229,7 +229,7 @@ cap_new(struct thread *td, struct cap_ne
 	if (error)
 		return (error);
 	AUDIT_ARG_FILE(td->td_proc, fp);
-	error = kern_capwrap(td, fp, rights, &fcapp, &capfd);
+	error = kern_capwrap(td, fp, rights, &capfd);
 	if (error)
 		return (error);
 
@@ -267,10 +267,10 @@ cap_getrights(struct thread *td, struct 
  */
 int
 kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
-    struct file **fcappp, int *capfdp)
+    int *capfdp)
 {
 	struct capability *cp, *cp_old;
-	struct file *fp_object;
+	struct file *fp_object, *fcapp;
 	int error;
 
 	if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID)
@@ -290,7 +290,7 @@ kern_capwrap(struct thread *td, struct f
 	/*
 	 * Allocate a new file descriptor to hang the capability off of.
 	 */
-	error = falloc(td, fcappp, capfdp, fp->f_flag);
+	error = falloc(td, &fcapp, capfdp, fp->f_flag);
 	if (error)
 		return (error);
 
@@ -309,18 +309,18 @@ kern_capwrap(struct thread *td, struct f
 	cp = uma_zalloc(capability_zone, M_WAITOK | M_ZERO);
 	cp->cap_rights = rights;
 	cp->cap_object = fp_object;
-	cp->cap_file = *fcappp;
+	cp->cap_file = fcapp;
 	if (fp->f_flag & DFLAG_PASSABLE)
-		finit(*fcappp, fp->f_flag, DTYPE_CAPABILITY, cp,
+		finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
 		    &capability_ops);
 	else
-		finit(*fcappp, fp->f_flag, DTYPE_CAPABILITY, cp,
+		finit(fcapp, fp->f_flag, DTYPE_CAPABILITY, cp,
 		    &capability_ops_unpassable);
 
 	/*
 	 * Release our private reference (the proc filedesc still has one).
 	 */
-	fdrop(*fcappp, td);
+	fdrop(fcapp, td);
 	return (0);
 }
 

Modified: head/sys/kern/vfs_lookup.c
==============================================================================
--- head/sys/kern/vfs_lookup.c	Sat Aug 13 00:56:42 2011	(r224809)
+++ head/sys/kern/vfs_lookup.c	Sat Aug 13 09:21:16 2011	(r224810)
@@ -180,6 +180,18 @@ namei(struct nameidata *ndp)
 	if (!error && *cnp->cn_pnbuf == '\0')
 		error = ENOENT;
 
+#ifdef CAPABILITY_MODE
+	/*
+	 * In capability mode, lookups must be "strictly relative" (i.e.
+	 * not an absolute path, and not containing '..' components) to
+	 * a real file descriptor, not the pseudo-descriptor AT_FDCWD.
+	 */
+	if (IN_CAPABILITY_MODE(td)) {
+		ndp->ni_strictrelative = 1;
+		if (ndp->ni_dirfd == AT_FDCWD)
+			error = ECAPMODE;
+	}
+#endif
 	if (error) {
 		uma_zfree(namei_zone, cnp->cn_pnbuf);
 #ifdef DIAGNOSTIC
@@ -214,12 +226,20 @@ namei(struct nameidata *ndp)
 				AUDIT_ARG_ATFD1(ndp->ni_dirfd);
 			if (cnp->cn_flags & AUDITVNODE2)
 				AUDIT_ARG_ATFD2(ndp->ni_dirfd);
-#ifdef CAPABILITY_MODE
-			KASSERT(!IN_CAPABILITY_MODE(td),
-			    ("%s: reached %s:%d in capability mode",
-			     __func__, __FILE__, __LINE__));
+			error = fgetvp_rights(td, ndp->ni_dirfd,
+			    ndp->ni_rightsneeded | CAP_LOOKUP,
+			    &(ndp->ni_baserights), &dp);
+#ifdef CAPABILITIES
+			/*
+			 * Lookups relative to a capability must also be
+			 * strictly relative.
+			 *
+			 * Note that a capability with rights CAP_MASK_VALID
+			 * is treated exactly like a regular file descriptor.
+			 */
+			if (ndp->ni_baserights != CAP_MASK_VALID)
+				ndp->ni_strictrelative = 1;
 #endif
-			error = fgetvp(td, ndp->ni_dirfd, 0, &dp);
 		}
 		if (error != 0 || dp != NULL) {
 			FILEDESC_SUNLOCK(fdp);
@@ -261,6 +281,8 @@ namei(struct nameidata *ndp)
 		if (*(cnp->cn_nameptr) == '/') {
 			vrele(dp);
 			VFS_UNLOCK_GIANT(vfslocked);
+			if (ndp->ni_strictrelative != 0)
+				return (ENOTCAPABLE);
 			while (*(cnp->cn_nameptr) == '/') {
 				cnp->cn_nameptr++;
 				ndp->ni_pathlen--;
@@ -604,7 +626,10 @@ dirloop:
 	}
 
 	/*
-	 * Handle "..": four special cases.
+	 * Handle "..": five special cases.
+	 * 0. If doing a capability lookup, return ENOTCAPABLE (this is a
+	 *    fairly conservative design choice, but it's the only one that we
+	 *    are satisfied guarantees the property we're looking for).
 	 * 1. Return an error if this is the last component of
 	 *    the name and the operation is DELETE or RENAME.
 	 * 2. If at root directory (e.g. after chroot)
@@ -618,6 +643,10 @@ dirloop:
 	 *    the jail or chroot, don't let them out.
 	 */
 	if (cnp->cn_flags & ISDOTDOT) {
+		if (ndp->ni_strictrelative != 0) {
+			error = ENOTCAPABLE;
+			goto bad;
+		}
 		if ((cnp->cn_flags & ISLASTCN) != 0 &&
 		    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
 			error = EINVAL;

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c	Sat Aug 13 00:56:42 2011	(r224809)
+++ head/sys/kern/vfs_syscalls.c	Sat Aug 13 09:21:16 2011	(r224810)
@@ -993,6 +993,41 @@ change_root(vp, td)
 	return (0);
 }
 
+static __inline cap_rights_t
+flags_to_rights(int flags)
+{
+	cap_rights_t rights = 0;
+
+	switch ((flags & O_ACCMODE)) {
+	case O_RDONLY:
+		rights |= CAP_READ;
+		break;
+
+	case O_RDWR:
+		rights |= CAP_READ;
+		/* fall through */
+
+	case O_WRONLY:
+		rights |= CAP_WRITE;
+		break;
+
+	case O_EXEC:
+		rights |= CAP_FEXECVE;
+		break;
+	}
+
+	if (flags & O_CREAT)
+		rights |= CAP_CREATE;
+
+	if (flags & O_TRUNC)
+		rights |= CAP_FTRUNCATE;
+
+	if ((flags & O_EXLOCK) || (flags & O_SHLOCK))
+		rights |= CAP_FLOCK;
+
+	return (rights);
+}
+
 /*
  * Check permissions, allocate an open file structure, and call the device
  * open routine if any.
@@ -1055,10 +1090,12 @@ kern_openat(struct thread *td, int fd, c
 	struct flock lf;
 	struct nameidata nd;
 	int vfslocked;
+	cap_rights_t rights_needed = CAP_LOOKUP;
 
 	AUDIT_ARG_FFLAGS(flags);
 	AUDIT_ARG_MODE(mode);
 	/* XXX: audit dirfd */
+	rights_needed |= flags_to_rights(flags);
 	/*
 	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 	 * may be specified.
@@ -1082,8 +1119,8 @@ kern_openat(struct thread *td, int fd, c
 	/* Set the flags early so the finit in devfs can pick them up. */
 	fp->f_flag = flags & FMASK;
 	cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
-	NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd,
-	    td);
+	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg,
+	    path, fd, rights_needed, td);
 	td->td_dupfd = -1;		/* XXX check for fdopen */
 	error = vn_open(&nd, &flags, cmode, fp);
 	if (error) {
@@ -1092,18 +1129,20 @@ kern_openat(struct thread *td, int fd, c
 		 * wonderous happened deep below and we just pass it up
 		 * pretending we know what we do.
 		 */
-		if (error == ENXIO && fp->f_ops != &badfileops) {
-			fdrop(fp, td);
-			td->td_retval[0] = indx;
-			return (0);
-		}
+		if (error == ENXIO && fp->f_ops != &badfileops)
+			goto success;
 
 		/*
 		 * handle special fdopen() case.  bleh.  dupfdopen() is
 		 * responsible for dropping the old contents of ofiles[indx]
 		 * if it succeeds.
+		 *
+		 * Don't do this for relative (capability) lookups; we don't
+		 * understand exactly what would happen, and we don't think
+		 * that it ever should.
 		 */
-		if ((error == ENODEV || error == ENXIO) &&
+		if ((nd.ni_strictrelative == 0) &&
+		    (error == ENODEV || error == ENXIO) &&
 		    (td->td_dupfd >= 0)) {
 			/* XXX from fdopen */
 			if ((error = finstall(td, fp, &indx, flags)) != 0)
@@ -1172,9 +1211,22 @@ success:
 	/*
 	 * If we haven't already installed the FD (for dupfdopen), do so now.
 	 */
-	if (indx == -1)
-		if ((error = finstall(td, fp, &indx, flags)) != 0)
-			goto bad_unlocked;
+	if (indx == -1) {
+#ifdef CAPABILITIES
+		if (nd.ni_strictrelative == 1) {
+			/*
+			 * We are doing a strict relative lookup; wrap the
+			 * result in a capability.
+			 */
+			if ((error = kern_capwrap(td, fp, nd.ni_baserights,
+			    &indx)) != 0)
+				goto bad_unlocked;
+		} else
+#endif
+			if ((error = finstall(td, fp, &indx, flags)) != 0)
+				goto bad_unlocked;
+
+	}
 
 	/*
 	 * Release our private reference, leaving the one associated with
@@ -1301,8 +1353,9 @@ kern_mknodat(struct thread *td, int fd, 
 		return (error);
 restart:
 	bwillwrite();
-	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
-	    pathseg, path, fd, td);
+	NDINIT_ATRIGHTS(&nd, CREATE,
+	    LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1, pathseg, path, fd,
+	    CAP_MKFIFO, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vfslocked = NDHASGIANT(&nd);
@@ -2153,8 +2206,8 @@ kern_accessat(struct thread *td, int fd,
 	} else
 		cred = tmpcred = td->td_ucred;
 	AUDIT_ARG_VALUE(mode);
-	NDINIT_AT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
-	    AUDITVNODE1, pathseg, path, fd, td);
+	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE |
+	    AUDITVNODE1, pathseg, path, fd, CAP_FSTAT, td);
 	if ((error = namei(&nd)) != 0)
 		goto out1;
 	vfslocked = NDHASGIANT(&nd);
@@ -2363,9 +2416,9 @@ kern_statat_vnhook(struct thread *td, in
 	if (flag & ~AT_SYMLINK_NOFOLLOW)
 		return (EINVAL);
 
-	NDINIT_AT(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
+	NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 	    FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | MPSAFE, pathseg,
-	    path, fd, td);
+	    path, fd, CAP_FSTAT, td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
@@ -2920,8 +2973,8 @@ kern_fchmodat(struct thread *td, int fd,
 
 	AUDIT_ARG_MODE(mode);
 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
-	NDINIT_AT(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg, path,
-	    fd, td);
+	NDINIT_ATRIGHTS(&nd, LOOKUP,  follow | MPSAFE | AUDITVNODE1, pathseg,
+	    path, fd, CAP_FCHMOD, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vfslocked = NDHASGIANT(&nd);
@@ -3063,8 +3116,8 @@ kern_fchownat(struct thread *td, int fd,
 
 	AUDIT_ARG_OWNER(uid, gid);
 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
-	NDINIT_AT(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg, path,
-	    fd, td);
+	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | MPSAFE | AUDITVNODE1, pathseg,
+	    path, fd, CAP_FCHOWN, td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
@@ -3279,8 +3332,8 @@ kern_utimesat(struct thread *td, int fd,
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
-	NDINIT_AT(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg, path,
-	    fd, td);
+	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | MPSAFE | AUDITVNODE1, pathseg,
+	    path, fd, CAP_FUTIMES, td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
@@ -3610,11 +3663,11 @@ kern_renameat(struct thread *td, int old
 
 	bwillwrite();
 #ifdef MAC
-	NDINIT_AT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE |
-	    AUDITVNODE1, pathseg, old, oldfd, td);
+	NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
+	    MPSAFE | AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 #else
-	NDINIT_AT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
-	    AUDITVNODE1, pathseg, old, oldfd, td);
+	NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE |
+	    AUDITVNODE1, pathseg, old, oldfd, CAP_DELETE, td);
 #endif
 
 	if ((error = namei(&fromnd)) != 0)
@@ -3637,8 +3690,9 @@ kern_renameat(struct thread *td, int old
 		vrele(fvp);
 		goto out1;
 	}
-	NDINIT_AT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART |
-	    MPSAFE | AUDITVNODE2, pathseg, new, newfd, td);
+	NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
+	    SAVESTART | MPSAFE | AUDITVNODE2, pathseg, new, newfd, CAP_CREATE,
+	    td);
 	if (fromnd.ni_vp->v_type == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&tond)) != 0) {
@@ -3764,8 +3818,8 @@ kern_mkdirat(struct thread *td, int fd, 
 	AUDIT_ARG_MODE(mode);
 restart:
 	bwillwrite();
-	NDINIT_AT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE | AUDITVNODE1,
-	    segflg, path, fd, td);
+	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE |
+	    AUDITVNODE1, segflg, path, fd, CAP_MKDIR, td);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&nd)) != 0)
 		return (error);
@@ -3853,8 +3907,8 @@ kern_rmdirat(struct thread *td, int fd, 
 
 restart:
 	bwillwrite();
-	NDINIT_AT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE | AUDITVNODE1,
-	    pathseg, path, fd, td);
+	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE |
+	    AUDITVNODE1, pathseg, path, fd, CAP_RMDIR, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vfslocked = NDHASGIANT(&nd);

Modified: head/sys/sys/capability.h
==============================================================================
--- head/sys/sys/capability.h	Sat Aug 13 00:56:42 2011	(r224809)
+++ head/sys/sys/capability.h	Sat Aug 13 09:21:16 2011	(r224810)
@@ -142,7 +142,7 @@
  * Create a capability to wrap a file object.
  */
 int	kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights,
-	    struct file **cap, int *capfd);
+	    int *capfd);
 
 /*
  * Unwrap a capability if its rights mask is a superset of 'rights'.

Modified: head/sys/sys/namei.h
==============================================================================
--- head/sys/sys/namei.h	Sat Aug 13 00:56:42 2011	(r224809)
+++ head/sys/sys/namei.h	Sat Aug 13 09:21:16 2011	(r224810)
@@ -63,6 +63,7 @@ struct nameidata {
 	 */
 	const	char *ni_dirp;		/* pathname pointer */
 	enum	uio_seg ni_segflg;	/* location of pathname */
+	cap_rights_t ni_rightsneeded;	/* rights required to look up vnode */
 	/*
 	 * Arguments to lookup.
 	 */
@@ -70,6 +71,11 @@ struct nameidata {
 	struct	vnode *ni_rootdir;	/* logical root directory */
 	struct	vnode *ni_topdir;	/* logical top directory */
 	int	ni_dirfd;		/* starting directory for *at functions */
+	int	ni_strictrelative;	/* relative lookup only; no '..' */
+	/*
+	 * Results: returned from namei
+	 */
+	cap_rights_t ni_baserights;	/* rights the *at base has (or -1) */
 	/*
 	 * Results: returned from/manipulated by lookup
 	 */
@@ -151,11 +157,13 @@ struct nameidata {
  * Initialization of a nameidata structure.
  */
 #define	NDINIT(ndp, op, flags, segflg, namep, td)			\
-	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, td)
+	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, 0, td)
 #define	NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td)		\
-	NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, td)
+	NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, 0, td)
+#define	NDINIT_ATRIGHTS(ndp, op, flags, segflg, namep, dirfd, rights, td) \
+	NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, rights, td)
 #define	NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td)		\
-	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, td)
+	NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, 0, td)
 
 static __inline void
 NDINIT_ALL(struct nameidata *ndp,
@@ -164,6 +172,7 @@ NDINIT_ALL(struct nameidata *ndp,
 	const char *namep,
 	int dirfd,
 	struct vnode *startdir,
+	cap_rights_t rights,
 	struct thread *td)
 {
 	ndp->ni_cnd.cn_nameiop = op;
@@ -172,6 +181,9 @@ NDINIT_ALL(struct nameidata *ndp,
 	ndp->ni_dirp = namep;
 	ndp->ni_dirfd = dirfd;
 	ndp->ni_startdir = startdir;
+	ndp->ni_strictrelative = 0;
+	ndp->ni_rightsneeded = rights;
+	ndp->ni_baserights = 0;
 	ndp->ni_cnd.cn_thread = td;
 }
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201108130921.p7D9LGIK005244>