Date: Mon, 5 Aug 2013 08:55:35 +0000 (UTC) From: Attilio Rao <attilio@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r253953 - in head/sys: cddl/contrib/opensolaris/uts/common/fs/zfs fs/tmpfs kern vm Message-ID: <201308050855.r758tZLZ011247@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: attilio Date: Mon Aug 5 08:55:35 2013 New Revision: 253953 URL: http://svnweb.freebsd.org/changeset/base/253953 Log: Revert r253939: We cannot busy a page before doing pagefaults. Infact, it can deadlock against vnode lock, as it tries to vget(). Other functions, right now, have an opposite lock ordering, like vm_object_sync(), which acquires the vnode lock first and then sleeps on the busy mechanism. Before this patch is reinserted we need to break this ordering. Sponsored by: EMC / Isilon storage division Reported by: kib Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c head/sys/fs/tmpfs/tmpfs_vnops.c head/sys/kern/imgact_elf.c head/sys/kern/kern_exec.c head/sys/kern/sys_process.c head/sys/vm/vm_extern.h head/sys/vm/vm_fault.c head/sys/vm/vm_glue.c head/sys/vm/vm_map.h Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Mon Aug 5 08:27:35 2013 (r253952) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Mon Aug 5 08:55:35 2013 (r253953) @@ -324,8 +324,7 @@ zfs_ioctl(vnode_t *vp, u_long com, intpt } static vm_page_t -page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes, - boolean_t alloc) +page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) { vm_object_t obj; vm_page_t pp; @@ -347,8 +346,6 @@ page_busy(vnode_t *vp, int64_t start, in continue; } } else if (pp == NULL) { - if (!alloc) - break; pp = vm_page_alloc(obj, OFF_TO_IDX(start), VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | VM_ALLOC_NOBUSY); @@ -359,10 +356,8 @@ page_busy(vnode_t *vp, int64_t start, in if (pp != NULL) { ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_io_start(pp); - if (!alloc) - break; vm_object_pip_add(obj, 1); + vm_page_io_start(pp); pmap_remove_write(pp); vm_page_clear_dirty(pp, off, nbytes); } @@ -372,12 +367,55 @@ page_busy(vnode_t *vp, int64_t start, in } static void -page_unbusy(vm_page_t pp, boolean_t unalloc) +page_unbusy(vm_page_t pp) { vm_page_io_finish(pp); - if (unalloc) - vm_object_pip_subtract(pp->object, 1); + vm_object_pip_subtract(pp->object, 1); +} + +static vm_page_t +page_hold(vnode_t *vp, int64_t start) +{ + vm_object_t obj; + vm_page_t pp; + + obj = vp->v_object; + zfs_vmobject_assert_wlocked(obj); + + for (;;) { + if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && + pp->valid) { + if ((pp->oflags & VPO_BUSY) != 0) { + /* + * Reference the page before unlocking and + * sleeping so that the page daemon is less + * likely to reclaim it. + */ + vm_page_reference(pp); + vm_page_sleep(pp, "zfsmwb"); + continue; + } + + ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); + vm_page_lock(pp); + vm_page_hold(pp); + vm_page_unlock(pp); + + } else + pp = NULL; + break; + } + return (pp); +} + +static void +page_unhold(vm_page_t pp) +{ + + vm_page_lock(pp); + vm_page_unhold(pp); + vm_page_unlock(pp); } static caddr_t @@ -441,8 +479,7 @@ update_pages(vnode_t *vp, int64_t start, zfs_vmobject_wlock(obj); vm_page_undirty(pp); - } else if ((pp = page_busy(vp, start, off, nbytes, - TRUE)) != NULL) { + } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); @@ -451,7 +488,7 @@ update_pages(vnode_t *vp, int64_t start, zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - page_unbusy(pp, TRUE); + page_unbusy(pp); } len -= nbytes; off = 0; @@ -561,7 +598,7 @@ mappedread(vnode_t *vp, int nbytes, uio_ vm_page_t pp; uint64_t bytes = MIN(PAGESIZE - off, len); - if (pp = page_busy(vp, start, 0, 0, FALSE)) { + if (pp = page_hold(vp, start)) { struct sf_buf *sf; caddr_t va; @@ -570,7 +607,7 @@ mappedread(vnode_t *vp, int nbytes, uio_ error = uiomove(va + off, bytes, UIO_READ, uio); zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - page_unbusy(pp, FALSE); + page_unhold(pp); } else { zfs_vmobject_wunlock(obj); error = dmu_read_uio(os, zp->z_id, uio, bytes); Modified: head/sys/fs/tmpfs/tmpfs_vnops.c ============================================================================== --- head/sys/fs/tmpfs/tmpfs_vnops.c Mon Aug 5 08:27:35 2013 (r253952) +++ head/sys/fs/tmpfs/tmpfs_vnops.c Mon Aug 5 08:55:35 2013 (r253953) @@ -485,13 +485,13 @@ tmpfs_nocacheread(vm_object_t tobj, vm_p vm_page_zero_invalid(m, TRUE); vm_page_wakeup(m); } - vm_page_io_start(m); + vm_page_lock(m); + vm_page_hold(m); + vm_page_unlock(m); VM_OBJECT_WUNLOCK(tobj); error = uiomove_fromphys(&m, offset, tlen, uio); - VM_OBJECT_WLOCK(tobj); - vm_page_io_finish(m); - VM_OBJECT_WUNLOCK(tobj); vm_page_lock(m); + vm_page_unhold(m); if (m->queue == PQ_NONE) { vm_page_deactivate(m); } else { @@ -602,14 +602,16 @@ tmpfs_mappedwrite(vm_object_t tobj, size vm_page_zero_invalid(tpg, TRUE); vm_page_wakeup(tpg); } - vm_page_io_start(tpg); + vm_page_lock(tpg); + vm_page_hold(tpg); + vm_page_unlock(tpg); VM_OBJECT_WUNLOCK(tobj); error = uiomove_fromphys(&tpg, offset, tlen, uio); VM_OBJECT_WLOCK(tobj); - vm_page_io_finish(tpg); if (error == 0) vm_page_dirty(tpg); vm_page_lock(tpg); + vm_page_unhold(tpg); if (tpg->queue == PQ_NONE) { vm_page_deactivate(tpg); } else { Modified: head/sys/kern/imgact_elf.c ============================================================================== --- head/sys/kern/imgact_elf.c Mon Aug 5 08:27:35 2013 (r253952) +++ head/sys/kern/imgact_elf.c Mon Aug 5 08:55:35 2013 (r253953) @@ -378,7 +378,7 @@ __elfN(map_partial)(vm_map_t map, vm_obj off = offset - trunc_page(offset); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, end - start); - vm_imgact_unmap_page(object, sf); + vm_imgact_unmap_page(sf); if (error) { return (KERN_FAILURE); } @@ -433,7 +433,7 @@ __elfN(map_insert)(vm_map_t map, vm_obje sz = PAGE_SIZE - off; error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, sz); - vm_imgact_unmap_page(object, sf); + vm_imgact_unmap_page(sf); if (error) { return (KERN_FAILURE); } @@ -553,7 +553,7 @@ __elfN(load_section)(struct image_params trunc_page(offset + filsz); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)map_addr, copy_len); - vm_imgact_unmap_page(object, sf); + vm_imgact_unmap_page(sf); if (error) { return (error); } Modified: head/sys/kern/kern_exec.c ============================================================================== --- head/sys/kern/kern_exec.c Mon Aug 5 08:27:35 2013 (r253952) +++ head/sys/kern/kern_exec.c Mon Aug 5 08:55:35 2013 (r253953) @@ -973,7 +973,7 @@ exec_map_first_page(imgp) vm_page_wakeup(ma[0]); } vm_page_lock(ma[0]); - vm_page_wire(ma[0]); + vm_page_hold(ma[0]); vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); @@ -994,7 +994,7 @@ exec_unmap_first_page(imgp) sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_lock(m); - vm_page_unwire(m, 0); + vm_page_unhold(m); vm_page_unlock(m); } } Modified: head/sys/kern/sys_process.c ============================================================================== --- head/sys/kern/sys_process.c Mon Aug 5 08:27:35 2013 (r253952) +++ head/sys/kern/sys_process.c Mon Aug 5 08:55:35 2013 (r253953) @@ -263,7 +263,6 @@ proc_rwmem(struct proc *p, struct uio *u writing = uio->uio_rw == UIO_WRITE; reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ; fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; - fault_flags |= VM_FAULT_IOBUSY; /* * Only map in one page at a time. We don't have to, but it @@ -288,9 +287,9 @@ proc_rwmem(struct proc *p, struct uio *u len = min(PAGE_SIZE - page_offset, uio->uio_resid); /* - * Fault and busy the page on behalf of the process. + * Fault and hold the page on behalf of the process. */ - error = vm_fault_handle(map, pageno, reqprot, fault_flags, &m); + error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m); if (error != KERN_SUCCESS) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; @@ -316,9 +315,9 @@ proc_rwmem(struct proc *p, struct uio *u /* * Release the page. */ - VM_OBJECT_WLOCK(m->object); - vm_page_io_finish(m); - VM_OBJECT_WUNLOCK(m->object); + vm_page_lock(m); + vm_page_unhold(m); + vm_page_unlock(m); } while (error == 0 && uio->uio_resid > 0); Modified: head/sys/vm/vm_extern.h ============================================================================== --- head/sys/vm/vm_extern.h Mon Aug 5 08:27:35 2013 (r253952) +++ head/sys/vm/vm_extern.h Mon Aug 5 08:55:35 2013 (r253953) @@ -63,7 +63,7 @@ void vm_fault_copy_entry(vm_map_t, vm_ma vm_ooffset_t *); int vm_fault_disable_pagefaults(void); void vm_fault_enable_pagefaults(int save); -int vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, +int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold); int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count); @@ -87,7 +87,7 @@ void vnode_pager_setsize(struct vnode *, int vslock(void *, size_t); void vsunlock(void *, size_t); struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset); -void vm_imgact_unmap_page(vm_object_t, struct sf_buf *sf); +void vm_imgact_unmap_page(struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); int vm_mlock(struct proc *, struct ucred *, const void *, size_t); Modified: head/sys/vm/vm_fault.c ============================================================================== --- head/sys/vm/vm_fault.c Mon Aug 5 08:27:35 2013 (r253952) +++ head/sys/vm/vm_fault.c Mon Aug 5 08:55:35 2013 (r253953) @@ -221,8 +221,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) ktrfault(vaddr, fault_type); #endif - result = vm_fault_handle(map, trunc_page(vaddr), fault_type, - fault_flags, NULL); + result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags, + NULL); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) ktrfaultend(result); @@ -231,7 +231,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr } int -vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, +vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; @@ -943,10 +943,7 @@ vnode_locked: vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; - if (fault_flags & VM_FAULT_IOBUSY) - vm_page_io_start(fs.m); - else - vm_page_hold(fs.m); + vm_page_hold(fs.m); } vm_page_unlock(fs.m); vm_page_wakeup(fs.m); @@ -1148,7 +1145,7 @@ vm_fault_quick_hold_pages(vm_map_t map, * and hold these pages. */ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) - if (*mp == NULL && vm_fault_handle(map, va, prot, + if (*mp == NULL && vm_fault_hold(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } Modified: head/sys/vm/vm_glue.c ============================================================================== --- head/sys/vm/vm_glue.c Mon Aug 5 08:27:35 2013 (r253952) +++ head/sys/vm/vm_glue.c Mon Aug 5 08:55:35 2013 (r253953) @@ -223,7 +223,7 @@ vsunlock(void *addr, size_t len) * Return the pinned page if successful; otherwise, return NULL. */ static vm_page_t -vm_imgact_page_iostart(vm_object_t object, vm_ooffset_t offset) +vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) { vm_page_t m, ma[1]; vm_pindex_t pindex; @@ -249,7 +249,9 @@ vm_imgact_page_iostart(vm_object_t objec } vm_page_wakeup(m); } - vm_page_io_start(m); + vm_page_lock(m); + vm_page_hold(m); + vm_page_unlock(m); out: VM_OBJECT_WUNLOCK(object); return (m); @@ -264,7 +266,7 @@ vm_imgact_map_page(vm_object_t object, v { vm_page_t m; - m = vm_imgact_page_iostart(object, offset); + m = vm_imgact_hold_page(object, offset); if (m == NULL) return (NULL); sched_pin(); @@ -275,16 +277,16 @@ vm_imgact_map_page(vm_object_t object, v * Destroy the given CPU private mapping and unpin the page that it mapped. */ void -vm_imgact_unmap_page(vm_object_t object, struct sf_buf *sf) +vm_imgact_unmap_page(struct sf_buf *sf) { vm_page_t m; m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); - VM_OBJECT_WLOCK(object); - vm_page_io_finish(m); - VM_OBJECT_WUNLOCK(object); + vm_page_lock(m); + vm_page_unhold(m); + vm_page_unlock(m); } void Modified: head/sys/vm/vm_map.h ============================================================================== --- head/sys/vm/vm_map.h Mon Aug 5 08:27:35 2013 (r253952) +++ head/sys/vm/vm_map.h Mon Aug 5 08:55:35 2013 (r253953) @@ -329,7 +329,6 @@ long vmspace_resident_count(struct vmspa #define VM_FAULT_NORMAL 0 /* Nothing special */ #define VM_FAULT_CHANGE_WIRING 1 /* Change the wiring as appropriate */ #define VM_FAULT_DIRTY 2 /* Dirty the page; use w/VM_PROT_COPY */ -#define VM_FAULT_IOBUSY 4 /* Busy the faulted page */ /* * Initially, mappings are slightly sequential. The maximum window size must
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308050855.r758tZLZ011247>