From owner-p4-projects@FreeBSD.ORG Wed Nov 25 15:14:55 2009 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 1ADE11065670; Wed, 25 Nov 2009 15:14:55 +0000 (UTC) Delivered-To: perforce@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id B9B22106566B for ; Wed, 25 Nov 2009 15:14:54 +0000 (UTC) (envelope-from jona@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id A5ABE8FC0A for ; Wed, 25 Nov 2009 15:14:54 +0000 (UTC) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id nAPFEsoS088566 for ; Wed, 25 Nov 2009 15:14:54 GMT (envelope-from jona@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.3/8.14.3/Submit) id nAPFEsiF088564 for perforce@freebsd.org; Wed, 25 Nov 2009 15:14:54 GMT (envelope-from jona@FreeBSD.org) Date: Wed, 25 Nov 2009 15:14:54 GMT Message-Id: <200911251514.nAPFEsiF088564@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to jona@FreeBSD.org using -f From: Jonathan Anderson To: Perforce Change Reviews Precedence: bulk Cc: Subject: PERFORCE change 171017 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 25 Nov 2009 15:14:55 -0000 http://p4web.freebsd.org/chv.cgi?CH=171017 Change 171017 by jona@jona-capsicum-kent on 2009/11/25 15:14:52 Add openat(2) in capability mode. openat(2) is now permitted in capability mode, subject to the constraint that the relative path must not "escape" the FD that the lookup is being conducted relative to. This results in EPERM when in capability mode (no change otherwise). openat(2) also now wraps the resulting FD with a capability if the directory FD was a capability. The rights of the new capability are identical to those of the original. Affected files ... .. //depot/projects/trustedbsd/capabilities/src/sys/amd64/conf/CAPABILITIES#2 edit .. //depot/projects/trustedbsd/capabilities/src/sys/kern/capabilities.conf#22 edit .. //depot/projects/trustedbsd/capabilities/src/sys/kern/init_sysent.c#42 edit .. //depot/projects/trustedbsd/capabilities/src/sys/kern/kern_descrip.c#30 edit .. //depot/projects/trustedbsd/capabilities/src/sys/kern/sys_capability.c#27 edit .. //depot/projects/trustedbsd/capabilities/src/sys/kern/vfs_lookup.c#13 edit .. //depot/projects/trustedbsd/capabilities/src/sys/kern/vfs_syscalls.c#18 edit .. //depot/projects/trustedbsd/capabilities/src/sys/sys/capability.h#26 edit .. //depot/projects/trustedbsd/capabilities/src/sys/sys/filedesc.h#4 edit .. //depot/projects/trustedbsd/capabilities/src/sys/sys/namei.h#6 edit Differences ... ==== //depot/projects/trustedbsd/capabilities/src/sys/amd64/conf/CAPABILITIES#2 (text+ko) ==== @@ -1,4 +1,9 @@ include GENERIC + options CAPABILITIES options PROCDESC options KDTRACE_HOOKS +options WITNESS +options KDB +options DDB + ==== //depot/projects/trustedbsd/capabilities/src/sys/kern/capabilities.conf#22 (text+ko) ==== @@ -38,7 +38,7 @@ ## - sys_exit(2), abort2(2) and close(2) are very important. ## - Sorted alphabetically, please keep it that way. ## -## $P4: //depot/projects/trustedbsd/capabilities/src/sys/kern/capabilities.conf#21 $ +## $P4: //depot/projects/trustedbsd/capabilities/src/sys/kern/capabilities.conf#22 $ ## ## @@ -453,6 +453,12 @@ olio_listio ## +## Allow openat(2), which we have constrained to prevent accessing files +## which are not "under" the directory FD given to the syscall. +## +openat + +## ## Allow poll(2), which will be scoped by capability rights. ## ## XXXRW: Perhaps we don't need the OpenBSD version? ==== //depot/projects/trustedbsd/capabilities/src/sys/kern/init_sysent.c#42 (text+ko) ==== @@ -533,7 +533,7 @@ { AS(mkdirat_args), (sy_call_t *)mkdirat, AUE_MKDIRAT, NULL, 0, 0, 0 }, /* 496 = mkdirat */ { AS(mkfifoat_args), (sy_call_t *)mkfifoat, AUE_MKFIFOAT, NULL, 0, 0, 0 }, /* 497 = mkfifoat */ { AS(mknodat_args), (sy_call_t *)mknodat, AUE_MKNODAT, NULL, 0, 0, 0 }, /* 498 = mknodat */ - { AS(openat_args), (sy_call_t *)openat, AUE_OPENAT_RWTC, NULL, 0, 0, 0 }, /* 499 = openat */ + { AS(openat_args), (sy_call_t *)openat, AUE_OPENAT_RWTC, NULL, 0, 0, SYF_CAPENABLED }, /* 499 = openat */ { AS(readlinkat_args), (sy_call_t *)readlinkat, AUE_READLINKAT, NULL, 0, 0, 0 }, /* 500 = readlinkat */ { AS(renameat_args), (sy_call_t *)renameat, AUE_RENAMEAT, NULL, 0, 0, 0 }, /* 501 = renameat */ { AS(symlinkat_args), (sy_call_t *)symlinkat, AUE_SYMLINKAT, NULL, 0, 0, 0 }, /* 502 = symlinkat */ ==== //depot/projects/trustedbsd/capabilities/src/sys/kern/kern_descrip.c#30 (text+ko) ==== @@ -1543,13 +1543,31 @@ int falloc(struct thread *td, struct file **resultfp, int *resultfd) { + return _falloc(td, resultfp, resultfd, 1); +} + +/* + * Create a new open file structure and, optionally, allocate a file decriptor + * for the process that refers to it. + */ +int +_falloc(struct thread *td, struct file **resultfp, int *resultfd, + int addfd) +{ struct proc *p = td->td_proc; struct file *fp; - int error, i; + int error, i = -1; int maxuserfiles = maxfiles - (maxfiles / 20); static struct timeval lastfail; static int curfail; + /* + * Cowardly refuse to create a referenceless file: if we're not adding + * the file to the process descriptor array, then the calling code + * MUST expect a pointer to be returned. + */ + if (!addfd && !resultfp) return (error = EINVAL); + fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); if ((openfiles >= maxuserfiles && priv_check(td, PRIV_MAXFILES) != 0) || @@ -1561,14 +1579,16 @@ uma_zfree(file_zone, fp); return (ENFILE); } - atomic_add_int(&openfiles, 1); + if (addfd) + atomic_add_int(&openfiles, 1); /* + * If addfd: * If the process has file descriptor zero open, add the new file * descriptor to the list of open files at that point, otherwise * put it at the front of the list of open files. */ - refcount_init(&fp->f_count, 1); + refcount_init(&fp->f_count, (addfd > 0)); if (resultfp) fhold(fp); fp->f_cred = crhold(td->td_ucred); @@ -1577,16 +1597,20 @@ fp->f_vnode = NULL; LIST_INIT(&fp->f_caps); fp->f_capcount = 0; - FILEDESC_XLOCK(p->p_fd); - if ((error = fdalloc(td, 0, &i))) { + + if (addfd) { + FILEDESC_XLOCK(p->p_fd); + if ((error = fdalloc(td, 0, &i))) { + FILEDESC_XUNLOCK(p->p_fd); + fdrop(fp, td); + if (resultfp) + fdrop(fp, td); + return (error); + } + p->p_fd->fd_ofiles[i] = fp; FILEDESC_XUNLOCK(p->p_fd); - fdrop(fp, td); - if (resultfp) - fdrop(fp, td); - return (error); } - p->p_fd->fd_ofiles[i] = fp; - FILEDESC_XUNLOCK(p->p_fd); + if (resultfp) *resultfp = fp; if (resultfd) ==== //depot/projects/trustedbsd/capabilities/src/sys/kern/sys_capability.c#27 (text+ko) ==== @@ -50,7 +50,7 @@ #include "opt_capabilities.h" #include -__FBSDID("$P4: //depot/projects/trustedbsd/capabilities/src/sys/kern/sys_capability.c#26 $"); +__FBSDID("$P4: //depot/projects/trustedbsd/capabilities/src/sys/kern/sys_capability.c#27 $"); #include #include @@ -278,28 +278,52 @@ int cap_new(struct thread *td, struct cap_new_args *uap) { - struct capability *c, *c_old; - struct file *fp, *fp_cap, *fp_object; - int error, fd_cap; + int error, capfd; + int fd = uap->fd; + struct file *fp, *cap; + cap_rights_t rights = uap->rights; - AUDIT_ARG_FD(uap->fd); - AUDIT_ARG_RIGHTS(uap->rights); - if ((uap->rights | CAP_MASK_VALID) != CAP_MASK_VALID) - return (EINVAL); + AUDIT_ARG_FD(fd); + AUDIT_ARG_RIGHTS(rights); - c = uma_zalloc(capability_zone, M_WAITOK | M_ZERO); - /* * We always allow creating a capability referencing an existing * descriptor or capability, even if it's not of much use to the * application. */ - error = fget(td, uap->fd, 0, &fp); - if (error) - goto fail; + error = fget(td, fd, 0, &fp); + if (error) return (error); AUDIT_ARG_FILE(td->td_proc, fp); + error = kern_capwrap(td, fp, rights, &cap, &capfd); + + /* + * Release our reference to the file (another one has been taken for + * the capability's sake if necessary). + */ + fdrop(fp, td); + + return error; +} + + +/* + * Create a capability to wrap around an existing file. + */ +int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, + struct file **cap, int *capfd) +{ + struct capability *c, *c_old; + struct file *fp_object; + int error; + + if ((rights | CAP_MASK_VALID) != CAP_MASK_VALID) + return (EINVAL); + + c = uma_zalloc(capability_zone, M_WAITOK | M_ZERO); + + /* * If a new capability is being derived from an existing capability, * then the new capability rights must be a subset of the existing @@ -307,18 +331,18 @@ */ if (fp->f_type == DTYPE_CAPABILITY) { c_old = fp->f_data; - if ((c_old->cap_rights | uap->rights) != c_old->cap_rights) { + if ((c_old->cap_rights | rights) != c_old->cap_rights) { error = ENOTCAPABLE; - goto fail2; + goto fail; } } /* * Allocate a new file descriptor to hang the capability off. */ - error = falloc(td, &fp_cap, &fd_cap); + error = falloc(td, cap, capfd); if (error) - goto fail2; + goto fail; /* * Rather than nesting capabilities, directly reference the object an @@ -332,10 +356,10 @@ else fp_object = fp; fhold(fp_object); - c->cap_rights = uap->rights; + c->cap_rights = rights; c->cap_object = fp_object; - c->cap_file = fp_cap; - finit(fp_cap, fp->f_flag, DTYPE_CAPABILITY, c, &capability_ops); + c->cap_file = *cap; + finit(*cap, fp->f_flag, DTYPE_CAPABILITY, c, &capability_ops); /* * Add this capability to the per-file list of referencing @@ -345,13 +369,15 @@ LIST_INSERT_HEAD(&fp_object->f_caps, c, cap_filelist); fp_object->f_capcount++; mtx_pool_unlock(mtxpool_sleep, fp_object); - td->td_retval[0] = fd_cap; - fdrop(fp, td); - fdrop(fp_cap, td); + td->td_retval[0] = *capfd; + + /* + * Release our private reference (the proc filedesc still has one). + */ + fdrop(*cap, td); + return (0); -fail2: - fdrop(fp, td); fail: uma_zfree(capability_zone, c); return (error); ==== //depot/projects/trustedbsd/capabilities/src/sys/kern/vfs_lookup.c#13 (text+ko) ==== @@ -140,9 +140,11 @@ int vfslocked; #ifdef KDB - if (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) { + if ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) + && (ndp->ni_dirfd == AT_FDCWD)) + { printf("namei: pid %d proc %s performed namei in capability " - "mode\n", p->p_pid, p->p_comm); + "mode (and it's not *at())\n", p->p_pid, p->p_comm); kdb_backtrace(); } #endif @@ -478,6 +480,7 @@ int dvfslocked; /* VFS Giant state for parent */ int tvfslocked; int lkflags_save; + int insidebasedir = 0; /* we're under the *at() base */ /* * Setup: break out flag bits into variables. @@ -504,6 +507,11 @@ cnp->cn_lkflags = LK_SHARED; else cnp->cn_lkflags = LK_EXCLUSIVE; + + /* we do not allow absolute lookups in capability mode */ + if(ndp->ni_basedir && (ndp->ni_startdir == ndp->ni_rootdir)) + return (error = EPERM); + dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; vn_lock(dp, @@ -572,6 +580,11 @@ goto bad; } + + /* Check to see if we're at the *at directory */ + if(dp == ndp->ni_basedir) insidebasedir = 1; + + /* * Check for degenerate name (e.g. / or "") * which is a way of talking about a directory, @@ -626,6 +639,13 @@ goto bad; } for (;;) { + /* attempting to wander out of the *at root */ + if(dp == ndp->ni_basedir) + { + error = EPERM; + goto bad; + } + for (pr = cnp->cn_cred->cr_prison; pr != NULL; pr = pr->pr_parent) if (dp == pr->pr_root) @@ -886,6 +906,16 @@ VOP_UNLOCK(dp, 0); success: /* + * If we're in capability mode and the syscall was *at(), ensure + * that the *at() base was part of the path + */ + if(ndp->ni_basedir && !insidebasedir) + { + error = EPERM; + goto bad; + } + + /* * Because of lookup_shared we may have the vnode shared locked, but * the caller may want it to be exclusively locked. */ ==== //depot/projects/trustedbsd/capabilities/src/sys/kern/vfs_syscalls.c#18 (text+ko) ==== @@ -1083,6 +1083,8 @@ return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); } + + int kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, int flags, int mode) @@ -1090,7 +1092,7 @@ struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; struct file *fp; - struct vnode *vp; + struct vnode *vp, *base = 0; struct vattr vat; struct mount *mp; int cmode; @@ -1099,6 +1101,7 @@ struct flock lf; struct nameidata nd; int vfslocked; + cap_rights_t baserights = -1; AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_MODE(mode); @@ -1115,16 +1118,69 @@ else flags = FFLAGS(flags); - error = falloc(td, &nfp, &indx); + /* get capability info of base FD */ + if (fd >= 0) + { + struct file *f; + const cap_rights_t LOOKUP_RIGHTS = CAP_LOOKUP | CAP_ATBASE; + + FILEDESC_SLOCK(fdp); + + error = fgetcap(td, fd, &f); + if (error == 0) { + /* FD is a capability; get rights and unwrap */ + struct file *real_fp = NULL; + + baserights = cap_rights(f); + error = cap_fextract(f, LOOKUP_RIGHTS, &real_fp); + + /* hold the underlying file, not the capability */ + if (error == 0) fhold(real_fp); + fdrop(f, td); + + f = real_fp; + } + else if (error == EINVAL) + /* not a capability; get the real file pointer */ + error = fget(td, fd, LOOKUP_RIGHTS, &f); + + + + /* if in capability mode, get base vnode (for namei) */ + if (!error && (td->td_ucred->cr_flags & CRED_FLAG_CAPMODE)) { + base = f->f_vnode; + vref(base); + } + + + /* don't need to hold the base any more */ + if (f != NULL) fdrop(f, td); + + if (error) { + FILEDESC_SUNLOCK(fdp); + return (error); + } + else + FILEDESC_SUNLOCK(fdp); + } + + + /* + * allocate the file descriptor, but only add it to the descriptor + * array if fd isn't a capability (in which case we'll add the + * capability instead, later) + */ + error = _falloc(td, &nfp, &indx, (baserights == -1)); if (error) return (error); - /* An extra reference on `nfp' has been held for us by falloc(). */ + + /* An extra reference on `nfp' has been held for us by _falloc(). */ fp = nfp; /* Set the flags early so the finit in devfs can pick them up. */ fp->f_flag = flags & FMASK; cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; - NDINIT_AT(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, path, fd, - td); + NDINIT_ATBASE(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | MPSAFE, pathseg, + path, fd, base, td); td->td_dupfd = -1; /* XXX check for fdopen */ error = vn_open(&nd, &flags, cmode, fp); if (error) { @@ -1133,11 +1189,8 @@ * wonderous happened deep below and we just pass it up * pretending we know what we do. */ - if (error == ENXIO && fp->f_ops != &badfileops) { - fdrop(fp, td); - td->td_retval[0] = indx; - return (0); - } + if (error == ENXIO && fp->f_ops != &badfileops) + goto success; /* * handle special fdopen() case. bleh. dupfdopen() is @@ -1147,15 +1200,14 @@ if ((error == ENODEV || error == ENXIO) && td->td_dupfd >= 0 && /* XXX from fdopen */ (error = - dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) { - td->td_retval[0] = indx; - fdrop(fp, td); - return (0); - } + dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) + goto success; + /* * Clean up the descriptor, but only if another thread hadn't * replaced or closed it. */ + if (base) vrele(base); fdclose(fdp, fp, indx, td); fdrop(fp, td); @@ -1213,15 +1265,28 @@ goto bad; } VFS_UNLOCK_GIANT(vfslocked); + +success: + if (baserights != -1) { + /* wrap the result in a capability */ + struct file *cap; + + error = kern_capwrap(td, fp, baserights, &cap, &indx); + if (error) goto bad_unlocked; + } + /* * Release our private reference, leaving the one associated with * the descriptor table intact. */ + if (base) vrele(base); fdrop(fp, td); td->td_retval[0] = indx; return (0); bad: VFS_UNLOCK_GIANT(vfslocked); +bad_unlocked: + if (base) vrele(base); fdclose(fdp, fp, indx, td); fdrop(fp, td); return (error); ==== //depot/projects/trustedbsd/capabilities/src/sys/sys/capability.h#26 (text+ko) ==== @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $P4: //depot/projects/trustedbsd/capabilities/src/sys/sys/capability.h#25 $ + * $P4: //depot/projects/trustedbsd/capabilities/src/sys/sys/capability.h#26 $ */ /* @@ -96,7 +96,8 @@ #define CAP_TTYHOOK 0x0001000000000000ULL /* register tty hook */ #define CAP_FCHDIR 0x0002000000000000ULL /* fchdir(2) */ #define CAP_FSCK 0x0004000000000000ULL /* sysctl_ffs_fsck */ -#define CAP_MASK_VALID 0x0007ffffffffffffULL +#define CAP_ATBASE 0x0008000000000000ULL /* openat(2), etc. */ +#define CAP_MASK_VALID 0x000fffffffffffffULL /* * Notes: @@ -138,6 +139,13 @@ #ifdef _KERNEL struct file; +struct thread; + +/* + * Create a capability to wrap a file object. + */ +int kern_capwrap(struct thread *td, struct file *fp, cap_rights_t rights, + struct file **cap, int *capfd); /* * Given a file descriptor that may be a capability, check the requested ==== //depot/projects/trustedbsd/capabilities/src/sys/sys/filedesc.h#4 (text+ko) ==== @@ -112,6 +112,8 @@ int dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error); int falloc(struct thread *td, struct file **resultfp, int *resultfd); +int _falloc(struct thread *td, struct file **resultfp, int *resultfd, + int addfd); int fdalloc(struct thread *td, int minfd, int *result); int fdavail(struct thread *td, int n); int fdcheckstd(struct thread *td); ==== //depot/projects/trustedbsd/capabilities/src/sys/sys/namei.h#6 (text+ko) ==== @@ -70,6 +70,7 @@ struct vnode *ni_rootdir; /* logical root directory */ struct vnode *ni_topdir; /* logical top directory */ int ni_dirfd; /* starting directory for *at functions */ + struct vnode *ni_basedir; /* root for capability-mode *at */ /* * Results: returned from/manipulated by lookup */ @@ -151,11 +152,13 @@ * Initialization of a nameidata structure. */ #define NDINIT(ndp, op, flags, segflg, namep, td) \ - NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, td) -#define NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td) \ - NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, td) -#define NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td) \ - NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, td) + NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, NULL, NULL, td) +#define NDINIT_AT(ndp, op, flags, segflg, namep, dirfd, td) \ + NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, NULL, td) +#define NDINIT_ATBASE(ndp, op, flags, segflg, namep, dirfd, base, td) \ + NDINIT_ALL(ndp, op, flags, segflg, namep, dirfd, NULL, base, td) +#define NDINIT_ATVP(ndp, op, flags, segflg, namep, vp, td) \ + NDINIT_ALL(ndp, op, flags, segflg, namep, AT_FDCWD, vp, NULL, td) static __inline void NDINIT_ALL(struct nameidata *ndp, @@ -164,6 +167,7 @@ const char *namep, int dirfd, struct vnode *startdir, + struct vnode *basedir, struct thread *td) { ndp->ni_cnd.cn_nameiop = op; @@ -172,6 +176,7 @@ ndp->ni_dirp = namep; ndp->ni_dirfd = dirfd; ndp->ni_startdir = startdir; + ndp->ni_basedir = basedir; ndp->ni_cnd.cn_thread = td; }