From owner-svn-src-all@FreeBSD.ORG Tue Oct 14 21:19:25 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id B506F605; Tue, 14 Oct 2014 21:19:25 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 96F0084F; Tue, 14 Oct 2014 21:19:25 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id s9ELJPwx064295; Tue, 14 Oct 2014 21:19:25 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id s9ELJOuR064290; Tue, 14 Oct 2014 21:19:24 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <201410142119.s9ELJOuR064290@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Tue, 14 Oct 2014 21:19:24 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r273109 - in stable/10/sys: kern sys X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 14 Oct 2014 21:19:25 -0000 Author: mjg Date: Tue Oct 14 21:19:23 2014 New Revision: 273109 URL: https://svnweb.freebsd.org/changeset/base/273109 Log: MFC r269023,r272503,r272505,r272523,r272567,r272569,r272574 Prepare fget_unlocked for reading fd table only once. Some capsicum functions accept fdp + fd and lookup fde based on that. Add variants which accept fde. =============================== Add sequence counters with memory barriers. Current implementation is somewhat simplistic and hackish, will be improved later after possible memory barrier overhaul. =============================== Plug capability races. fp and appropriate capability lookups were not atomic, which could result in improper capabilities being checked. This could result either in protection bypass or in a spurious ENOTCAPABLE. Make fp + capability check atomic with the help of sequence counters. =============================== Put and #ifdef _KERNEL around the #include for opt_capsicum.h to hopefully allow the build to finish after r272505. =============================== filedesc: fix up breakage introduced in 272505 Include sequence counter supports incoditionally [1]. This fixes reprted build problems with e.g. nvidia driver due to missing opt_capsicum.h. Replace fishy looking sizeof with offsetof. Make fde_seq the last member in order to simplify calculations. =============================== Keep struct filedescent comments within 80-char limit. =============================== seq_t needs to be visible to userspace Added: stable/10/sys/sys/seq.h - copied, changed from r272503, head/sys/sys/seq.h Modified: stable/10/sys/kern/kern_descrip.c stable/10/sys/kern/sys_capability.c stable/10/sys/sys/capability.h stable/10/sys/sys/filedesc.h Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/kern/kern_descrip.c ============================================================================== --- stable/10/sys/kern/kern_descrip.c Tue Oct 14 21:02:33 2014 (r273108) +++ stable/10/sys/kern/kern_descrip.c Tue Oct 14 21:19:23 2014 (r273109) @@ -304,11 +304,18 @@ _fdfree(struct filedesc *fdp, int fd, in struct filedescent *fde; fde = &fdp->fd_ofiles[fd]; +#ifdef CAPABILITIES + if (!last) + seq_write_begin(&fde->fde_seq); +#endif filecaps_free(&fde->fde_caps); if (last) return; - bzero(fde, sizeof(*fde)); + bzero(fde, fde_change_size); fdunused(fdp, fd); +#ifdef CAPABILITIES + seq_write_end(&fde->fde_seq); +#endif } static inline void @@ -899,13 +906,19 @@ do_dup(struct thread *td, int flags, int /* * Duplicate the source descriptor. */ +#ifdef CAPABILITIES + seq_write_begin(&newfde->fde_seq); +#endif filecaps_free(&newfde->fde_caps); - *newfde = *oldfde; + memcpy(newfde, oldfde, fde_change_size); filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); if ((flags & DUP_CLOEXEC) != 0) newfde->fde_flags = oldfde->fde_flags | UF_EXCLOSE; else newfde->fde_flags = oldfde->fde_flags & ~UF_EXCLOSE; +#ifdef CAPABILITIES + seq_write_end(&newfde->fde_seq); +#endif *retval = new; if (delfp != NULL) { @@ -1804,6 +1817,9 @@ finstall(struct thread *td, struct file } fhold(fp); fde = &fdp->fd_ofiles[*fd]; +#ifdef CAPABILITIES + seq_write_begin(&fde->fde_seq); +#endif fde->fde_file = fp; if ((flags & O_CLOEXEC) != 0) fde->fde_flags |= UF_EXCLOSE; @@ -1811,6 +1827,9 @@ finstall(struct thread *td, struct file filecaps_move(fcaps, &fde->fde_caps); else filecaps_fill(&fde->fde_caps); +#ifdef CAPABILITIES + seq_write_end(&fde->fde_seq); +#endif FILEDESC_XUNLOCK(fdp); return (0); } @@ -2336,9 +2355,13 @@ int fget_unlocked(struct filedesc *fdp, int fd, cap_rights_t *needrightsp, int needfcntl, struct file **fpp, cap_rights_t *haverightsp) { +#ifdef CAPABILITIES + struct filedescent fde; +#endif struct file *fp; u_int count; #ifdef CAPABILITIES + seq_t seq; cap_rights_t haverights; int error; #endif @@ -2358,17 +2381,27 @@ fget_unlocked(struct filedesc *fdp, int * due to preemption. */ for (;;) { +#ifdef CAPABILITIES + seq = seq_read(fd_seq(fdp, fd)); + fde = fdp->fd_ofiles[fd]; + if (!seq_consistent(fd_seq(fdp, fd), seq)) { + cpu_spinwait(); + continue; + } + fp = fde.fde_file; +#else fp = fdp->fd_ofiles[fd].fde_file; +#endif if (fp == NULL) return (EBADF); #ifdef CAPABILITIES - haverights = *cap_rights(fdp, fd); + haverights = *cap_rights_fde(&fde); if (needrightsp != NULL) { error = cap_check(&haverights, needrightsp); if (error != 0) return (error); if (cap_rights_is_set(needrightsp, CAP_FCNTL)) { - error = cap_fcntl_check(fdp, fd, needfcntl); + error = cap_fcntl_check_fde(&fde, needfcntl); if (error != 0) return (error); } @@ -2383,7 +2416,11 @@ fget_unlocked(struct filedesc *fdp, int */ if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1) continue; +#ifdef CAPABILITIES + if (seq_consistent_nomb(fd_seq(fdp, fd), seq)) +#else if (fp == fdp->fd_ofiles[fd].fde_file) +#endif break; fdrop(fp, curthread); } @@ -2740,6 +2777,7 @@ int dupfdopen(struct thread *td, struct filedesc *fdp, int dfd, int mode, int openerror, int *indxp) { + struct filedescent *newfde, *oldfde; struct file *fp; int error, indx; @@ -2783,17 +2821,32 @@ dupfdopen(struct thread *td, struct file return (EACCES); } fhold(fp); - fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; - filecaps_copy(&fdp->fd_ofiles[dfd].fde_caps, - &fdp->fd_ofiles[indx].fde_caps); + newfde = &fdp->fd_ofiles[indx]; + oldfde = &fdp->fd_ofiles[dfd]; +#ifdef CAPABILITIES + seq_write_begin(&newfde->fde_seq); +#endif + memcpy(newfde, oldfde, fde_change_size); + filecaps_copy(&oldfde->fde_caps, &newfde->fde_caps); +#ifdef CAPABILITIES + seq_write_end(&newfde->fde_seq); +#endif break; case ENXIO: /* * Steal away the file pointer from dfd and stuff it into indx. */ - fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; - bzero(&fdp->fd_ofiles[dfd], sizeof(fdp->fd_ofiles[dfd])); + newfde = &fdp->fd_ofiles[indx]; + oldfde = &fdp->fd_ofiles[dfd]; +#ifdef CAPABILITIES + seq_write_begin(&newfde->fde_seq); +#endif + memcpy(newfde, oldfde, fde_change_size); + bzero(oldfde, fde_change_size); fdunused(fdp, dfd); +#ifdef CAPABILITIES + seq_write_end(&newfde->fde_seq); +#endif break; } FILEDESC_XUNLOCK(fdp); Modified: stable/10/sys/kern/sys_capability.c ============================================================================== --- stable/10/sys/kern/sys_capability.c Tue Oct 14 21:02:33 2014 (r273108) +++ stable/10/sys/kern/sys_capability.c Tue Oct 14 21:19:23 2014 (r273109) @@ -199,11 +199,19 @@ cap_rights_to_vmprot(cap_rights_t *havep * any other way, as we want to keep all capability permission evaluation in * this one file. */ + +cap_rights_t * +cap_rights_fde(struct filedescent *fde) +{ + + return (&fde->fde_rights); +} + cap_rights_t * cap_rights(struct filedesc *fdp, int fd) { - return (&fdp->fd_ofiles[fd].fde_rights); + return (cap_rights_fde(&fdp->fd_ofiles[fd])); } /* @@ -486,24 +494,31 @@ out: * Test whether a capability grants the given fcntl command. */ int -cap_fcntl_check(struct filedesc *fdp, int fd, int cmd) +cap_fcntl_check_fde(struct filedescent *fde, int cmd) { uint32_t fcntlcap; - KASSERT(fd >= 0 && fd < fdp->fd_nfiles, - ("%s: invalid fd=%d", __func__, fd)); - fcntlcap = (1 << cmd); KASSERT((CAP_FCNTL_ALL & fcntlcap) != 0, ("Unsupported fcntl=%d.", cmd)); - if ((fdp->fd_ofiles[fd].fde_fcntls & fcntlcap) != 0) + if ((fde->fde_fcntls & fcntlcap) != 0) return (0); return (ENOTCAPABLE); } int +cap_fcntl_check(struct filedesc *fdp, int fd, int cmd) +{ + + KASSERT(fd >= 0 && fd < fdp->fd_nfiles, + ("%s: invalid fd=%d", __func__, fd)); + + return (cap_fcntl_check_fde(&fdp->fd_ofiles[fd], cmd)); +} + +int sys_cap_fcntls_limit(struct thread *td, struct cap_fcntls_limit_args *uap) { struct filedesc *fdp; Modified: stable/10/sys/sys/capability.h ============================================================================== --- stable/10/sys/sys/capability.h Tue Oct 14 21:02:33 2014 (r273108) +++ stable/10/sys/sys/capability.h Tue Oct 14 21:19:23 2014 (r273109) @@ -343,6 +343,7 @@ bool cap_rights_contains(const cap_right #define IN_CAPABILITY_MODE(td) ((td->td_ucred->cr_flags & CRED_FLAG_CAPMODE) != 0) struct filedesc; +struct filedescent; /* * Test whether a capability grants the requested rights. @@ -357,9 +358,11 @@ u_char cap_rights_to_vmprot(cap_rights_t * For the purposes of procstat(1) and similar tools, allow kern_descrip.c to * extract the rights from a capability. */ +cap_rights_t *cap_rights_fde(struct filedescent *fde); cap_rights_t *cap_rights(struct filedesc *fdp, int fd); int cap_ioctl_check(struct filedesc *fdp, int fd, u_long cmd); +int cap_fcntl_check_fde(struct filedescent *fde, int cmd); int cap_fcntl_check(struct filedesc *fdp, int fd, int cmd); #else /* !_KERNEL */ Modified: stable/10/sys/sys/filedesc.h ============================================================================== --- stable/10/sys/sys/filedesc.h Tue Oct 14 21:02:33 2014 (r273108) +++ stable/10/sys/sys/filedesc.h Tue Oct 14 21:19:23 2014 (r273109) @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -50,14 +51,16 @@ struct filecaps { }; struct filedescent { - struct file *fde_file; /* file structure for open file */ - struct filecaps fde_caps; /* per-descriptor rights */ - uint8_t fde_flags; /* per-process open file flags */ + struct file *fde_file; /* file structure for open file */ + struct filecaps fde_caps; /* per-descriptor rights */ + uint8_t fde_flags; /* per-process open file flags */ + seq_t fde_seq; /* keep file and caps in sync */ }; #define fde_rights fde_caps.fc_rights #define fde_fcntls fde_caps.fc_fcntls #define fde_ioctls fde_caps.fc_ioctls #define fde_nioctls fde_caps.fc_nioctls +#define fde_change_size (offsetof(struct filedescent, fde_seq)) /* * This structure is used for the management of descriptors. It may be @@ -82,6 +85,7 @@ struct filedesc { int fd_holdleaderscount; /* block fdfree() for shared close() */ int fd_holdleaderswakeup; /* fdfree() needs wakeup */ }; +#define fd_seq(fdp, fd) (&(fdp)->fd_ofiles[(fd)].fde_seq) /* * Structure to keep track of (process leader, struct fildedesc) tuples. Copied and modified: stable/10/sys/sys/seq.h (from r272503, head/sys/sys/seq.h) ============================================================================== --- head/sys/sys/seq.h Sat Oct 4 08:03:52 2014 (r272503, copy source) +++ stable/10/sys/sys/seq.h Tue Oct 14 21:19:23 2014 (r273109) @@ -29,6 +29,16 @@ #define _SYS_SEQ_H_ #ifdef _KERNEL +#include +#endif +#include + +/* + * seq_t may be included in structs visible to userspace + */ +typedef uint32_t seq_t; + +#ifdef _KERNEL /* * Typical usage: @@ -54,10 +64,7 @@ * foo(lobj); */ -typedef uint32_t seq_t; - /* A hack to get MPASS macro */ -#include #include #include