From owner-svn-src-stable@FreeBSD.ORG Mon May 28 22:58:14 2012 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 504B2106564A; Mon, 28 May 2012 22:58:14 +0000 (UTC) (envelope-from alc@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 395488FC0C; Mon, 28 May 2012 22:58:14 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q4SMwEih038566; Mon, 28 May 2012 22:58:14 GMT (envelope-from alc@svn.freebsd.org) Received: (from alc@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q4SMwDaj038561; Mon, 28 May 2012 22:58:13 GMT (envelope-from alc@svn.freebsd.org) Message-Id: <201205282258.q4SMwDaj038561@svn.freebsd.org> From: Alan Cox Date: Mon, 28 May 2012 22:58:13 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r236208 - in stable/9/sys: fs/tmpfs kern X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 28 May 2012 22:58:14 -0000 Author: alc Date: Mon May 28 22:58:13 2012 New Revision: 236208 URL: http://svn.freebsd.org/changeset/base/236208 Log: MFC r229821 Correct an error of omission in the implementation of the truncation operation on POSIX shared memory objects and tmpfs. Previously, neither of these modules correctly handled the case in which the new size of the object or file was not a multiple of the page size. Specifically, they did not handle partial page truncation of data stored on swap. As a result, stale data might later be returned to an application. Interestingly, a data inconsistency was less likely to occur under tmpfs than POSIX shared memory objects. The reason being that a different mistake by the tmpfs truncation operation helped avoid a data inconsistency. If the data was still resident in memory in a PG_CACHED page, then the tmpfs truncation operation would reactivate that page, zero the truncated portion, and leave the page pinned in memory. More precisely, the benevolent error was that the truncation operation didn't add the reactivated page to any of the paging queues, effectively pinning the page. This page would remain pinned until the file was destroyed or the page was read or written. With this change, the page is now added to the inactive queue. MFC r230180 When tmpfs_write() resets an extended file to its original size after an error, we want tmpfs_reg_resize() to ignore I/O errors and unconditionally update the file's size. Modified: stable/9/sys/fs/tmpfs/tmpfs.h stable/9/sys/fs/tmpfs/tmpfs_subr.c stable/9/sys/fs/tmpfs/tmpfs_vnops.c stable/9/sys/kern/uipc_shm.c Directory Properties: stable/9/sys/ (props changed) stable/9/sys/fs/ (props changed) Modified: stable/9/sys/fs/tmpfs/tmpfs.h ============================================================================== --- stable/9/sys/fs/tmpfs/tmpfs.h Mon May 28 22:30:36 2012 (r236207) +++ stable/9/sys/fs/tmpfs/tmpfs.h Mon May 28 22:58:13 2012 (r236208) @@ -435,7 +435,7 @@ struct tmpfs_dirent * tmpfs_dir_lookupby int tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, off_t *); int tmpfs_dir_whiteout_add(struct vnode *, struct componentname *); void tmpfs_dir_whiteout_remove(struct vnode *, struct componentname *); -int tmpfs_reg_resize(struct vnode *, off_t); +int tmpfs_reg_resize(struct vnode *, off_t, boolean_t); int tmpfs_chflags(struct vnode *, int, struct ucred *, struct thread *); int tmpfs_chmod(struct vnode *, mode_t, struct ucred *, struct thread *); int tmpfs_chown(struct vnode *, uid_t, gid_t, struct ucred *, Modified: stable/9/sys/fs/tmpfs/tmpfs_subr.c ============================================================================== --- stable/9/sys/fs/tmpfs/tmpfs_subr.c Mon May 28 22:30:36 2012 (r236207) +++ stable/9/sys/fs/tmpfs/tmpfs_subr.c Mon May 28 22:58:13 2012 (r236208) @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -954,15 +955,15 @@ tmpfs_dir_whiteout_remove(struct vnode * * Returns zero on success or an appropriate error code on failure. */ int -tmpfs_reg_resize(struct vnode *vp, off_t newsize) +tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) { struct tmpfs_mount *tmp; struct tmpfs_node *node; vm_object_t uobj; - vm_page_t m; - vm_pindex_t newpages, oldpages; + vm_page_t m, ma[1]; + vm_pindex_t idx, newpages, oldpages; off_t oldsize; - size_t zerolen; + int base, rv; MPASS(vp->v_type == VREG); MPASS(newsize >= 0); @@ -985,15 +986,61 @@ tmpfs_reg_resize(struct vnode *vp, off_t tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0) return (ENOSPC); - TMPFS_LOCK(tmp); - tmp->tm_pages_used += (newpages - oldpages); - TMPFS_UNLOCK(tmp); - - node->tn_size = newsize; - vnode_pager_setsize(vp, newsize); VM_OBJECT_LOCK(uobj); if (newsize < oldsize) { /* + * Zero the truncated part of the last page. + */ + base = newsize & PAGE_MASK; + if (base != 0) { + idx = OFF_TO_IDX(newsize); +retry: + m = vm_page_lookup(uobj, idx); + if (m != NULL) { + if ((m->oflags & VPO_BUSY) != 0 || + m->busy != 0) { + vm_page_sleep(m, "tmfssz"); + goto retry; + } + } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { + m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL); + if (m == NULL) { + VM_OBJECT_UNLOCK(uobj); + VM_WAIT; + VM_OBJECT_LOCK(uobj); + goto retry; + } else if (m->valid != VM_PAGE_BITS_ALL) { + ma[0] = m; + rv = vm_pager_get_pages(uobj, ma, 1, 0); + m = vm_page_lookup(uobj, idx); + } else + /* A cached page was reactivated. */ + rv = VM_PAGER_OK; + vm_page_lock(m); + if (rv == VM_PAGER_OK) { + vm_page_deactivate(m); + vm_page_unlock(m); + vm_page_wakeup(m); + } else { + vm_page_free(m); + vm_page_unlock(m); + if (ignerr) + m = NULL; + else { + VM_OBJECT_UNLOCK(uobj); + return (EIO); + } + } + } + if (m != NULL) { + pmap_zero_page_area(m, base, PAGE_SIZE - base); + MPASS(m->valid == VM_PAGE_BITS_ALL); + vm_page_dirty(m); + vm_pager_page_unswapped(m); + } + } + + /* * Release any swap space and free any whole pages. */ if (newpages < oldpages) { @@ -1001,19 +1048,16 @@ tmpfs_reg_resize(struct vnode *vp, off_t newpages); vm_object_page_remove(uobj, newpages, 0, 0); } - - /* - * Zero the truncated part of the last page. - */ - zerolen = round_page(newsize) - newsize; - if (zerolen > 0) { - m = vm_page_grab(uobj, OFF_TO_IDX(newsize), - VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_RETRY); - pmap_zero_page_area(m, PAGE_SIZE - zerolen, zerolen); - } } uobj->size = newpages; VM_OBJECT_UNLOCK(uobj); + + TMPFS_LOCK(tmp); + tmp->tm_pages_used += (newpages - oldpages); + TMPFS_UNLOCK(tmp); + + node->tn_size = newsize; + vnode_pager_setsize(vp, newsize); return (0); } @@ -1384,7 +1428,7 @@ tmpfs_truncate(struct vnode *vp, off_t l if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) return (EFBIG); - error = tmpfs_reg_resize(vp, length); + error = tmpfs_reg_resize(vp, length, FALSE); if (error == 0) { node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; } Modified: stable/9/sys/fs/tmpfs/tmpfs_vnops.c ============================================================================== --- stable/9/sys/fs/tmpfs/tmpfs_vnops.c Mon May 28 22:30:36 2012 (r236207) +++ stable/9/sys/fs/tmpfs/tmpfs_vnops.c Mon May 28 22:58:13 2012 (r236208) @@ -755,7 +755,8 @@ tmpfs_write(struct vop_write_args *v) extended = uio->uio_offset + uio->uio_resid > node->tn_size; if (extended) { - error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid); + error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid, + FALSE); if (error != 0) goto out; } @@ -781,7 +782,7 @@ tmpfs_write(struct vop_write_args *v) } if (error != 0) - (void)tmpfs_reg_resize(vp, oldsize); + (void)tmpfs_reg_resize(vp, oldsize, TRUE); out: MPASS(IMPLIES(error == 0, uio->uio_resid == 0)); Modified: stable/9/sys/kern/uipc_shm.c ============================================================================== --- stable/9/sys/kern/uipc_shm.c Mon May 28 22:30:36 2012 (r236207) +++ stable/9/sys/kern/uipc_shm.c Mon May 28 22:58:13 2012 (r236208) @@ -39,13 +39,6 @@ * * (3) Resource limits? Does this need its own resource limits or are the * existing limits in mmap(2) sufficient? - * - * (4) Partial page truncation. vnode_pager_setsize() will zero any parts - * of a partially mapped page as a result of ftruncate(2)/truncate(2). - * We can do the same (with the same pmap evil), but do we need to - * worry about the bits on disk if the page is swapped out or will the - * swapper zero the parts of a page that are invalid if the page is - * swapped back in for us? */ #include @@ -84,6 +77,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -251,9 +245,10 @@ static int shm_dotruncate(struct shmfd *shmfd, off_t length) { vm_object_t object; - vm_page_t m; - vm_pindex_t nobjsize; + vm_page_t m, ma[1]; + vm_pindex_t idx, nobjsize; vm_ooffset_t delta; + int base, rv; object = shmfd->shm_object; VM_OBJECT_LOCK(object); @@ -265,6 +260,56 @@ shm_dotruncate(struct shmfd *shmfd, off_ /* Are we shrinking? If so, trim the end. */ if (length < shmfd->shm_size) { + + /* + * Zero the truncated part of the last page. + */ + base = length & PAGE_MASK; + if (base != 0) { + idx = OFF_TO_IDX(length); +retry: + m = vm_page_lookup(object, idx); + if (m != NULL) { + if ((m->oflags & VPO_BUSY) != 0 || + m->busy != 0) { + vm_page_sleep(m, "shmtrc"); + goto retry; + } + } else if (vm_pager_has_page(object, idx, NULL, NULL)) { + m = vm_page_alloc(object, idx, VM_ALLOC_NORMAL); + if (m == NULL) { + VM_OBJECT_UNLOCK(object); + VM_WAIT; + VM_OBJECT_LOCK(object); + goto retry; + } else if (m->valid != VM_PAGE_BITS_ALL) { + ma[0] = m; + rv = vm_pager_get_pages(object, ma, 1, + 0); + m = vm_page_lookup(object, idx); + } else + /* A cached page was reactivated. */ + rv = VM_PAGER_OK; + vm_page_lock(m); + if (rv == VM_PAGER_OK) { + vm_page_deactivate(m); + vm_page_unlock(m); + vm_page_wakeup(m); + } else { + vm_page_free(m); + vm_page_unlock(m); + VM_OBJECT_UNLOCK(object); + return (EIO); + } + } + if (m != NULL) { + pmap_zero_page_area(m, base, PAGE_SIZE - base); + KASSERT(m->valid == VM_PAGE_BITS_ALL, + ("shm_dotruncate: page %p is invalid", m)); + vm_page_dirty(m); + vm_pager_page_unswapped(m); + } + } delta = ptoa(object->size - nobjsize); /* Toss in memory pages. */ @@ -279,45 +324,7 @@ shm_dotruncate(struct shmfd *shmfd, off_ /* Free the swap accounted for shm */ swap_release_by_cred(delta, object->cred); object->charge -= delta; - - /* - * If the last page is partially mapped, then zero out - * the garbage at the end of the page. See comments - * in vnode_pager_setsize() for more details. - * - * XXXJHB: This handles in memory pages, but what about - * a page swapped out to disk? - */ - if ((length & PAGE_MASK) && - (m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL && - m->valid != 0) { - int base = (int)length & PAGE_MASK; - int size = PAGE_SIZE - base; - - pmap_zero_page_area(m, base, size); - - /* - * Update the valid bits to reflect the blocks that - * have been zeroed. Some of these valid bits may - * have already been set. - */ - vm_page_set_valid(m, base, size); - - /* - * Round "base" to the next block boundary so that the - * dirty bit for a partially zeroed block is not - * cleared. - */ - base = roundup2(base, DEV_BSIZE); - - vm_page_clear_dirty(m, base, PAGE_SIZE - base); - } else if ((length & PAGE_MASK) && - __predict_false(object->cache != NULL)) { - vm_page_cache_free(object, OFF_TO_IDX(length), - nobjsize); - } } else { - /* Attempt to reserve the swap */ delta = ptoa(nobjsize - object->size); if (!swap_reserve_by_cred(delta, object->cred)) {