Date: Sun, 4 Aug 2013 21:07:24 +0000 (UTC) From: Attilio Rao <attilio@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r253939 - in head/sys: cddl/contrib/opensolaris/uts/common/fs/zfs fs/tmpfs kern vm Message-ID: <201308042107.r74L7OvH096499@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: attilio Date: Sun Aug 4 21:07:24 2013 New Revision: 253939 URL: http://svnweb.freebsd.org/changeset/base/253939 Log: The page hold mechanism is fast but it has couple of fallouts: - It does not let pages respect the LRU policy - It bloats the active/inactive queues of few pages Try to avoid it as much as possible with the long-term target to completely remove it. Use the soft-busy mechanism to protect page content accesses during short-term operations (like uiomove_fromphys()). After this change only vm_fault_quick_hold_pages() is still using the hold mechanism for page content access. There is an additional complexity there as the quick path cannot immediately access the page object to busy the page and the slow path cannot however busy more than one page a time (to avoid deadlocks). Fixing such primitive can bring to complete removal of the page hold mechanism. Sponsored by: EMC / Isilon storage division Discussed with: alc Reviewed by: jeff Tested by: pho Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c head/sys/fs/tmpfs/tmpfs_vnops.c head/sys/kern/imgact_elf.c head/sys/kern/kern_exec.c head/sys/kern/sys_process.c head/sys/vm/vm_extern.h head/sys/vm/vm_fault.c head/sys/vm/vm_glue.c head/sys/vm/vm_map.h Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c ============================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Sun Aug 4 21:00:22 2013 (r253938) +++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Sun Aug 4 21:07:24 2013 (r253939) @@ -324,7 +324,8 @@ zfs_ioctl(vnode_t *vp, u_long com, intpt } static vm_page_t -page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes) +page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes, + boolean_t alloc) { vm_object_t obj; vm_page_t pp; @@ -346,6 +347,8 @@ page_busy(vnode_t *vp, int64_t start, in continue; } } else if (pp == NULL) { + if (!alloc) + break; pp = vm_page_alloc(obj, OFF_TO_IDX(start), VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED | VM_ALLOC_NOBUSY); @@ -356,8 +359,10 @@ page_busy(vnode_t *vp, int64_t start, in if (pp != NULL) { ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_object_pip_add(obj, 1); vm_page_io_start(pp); + if (!alloc) + break; + vm_object_pip_add(obj, 1); pmap_remove_write(pp); vm_page_clear_dirty(pp, off, nbytes); } @@ -367,55 +372,12 @@ page_busy(vnode_t *vp, int64_t start, in } static void -page_unbusy(vm_page_t pp) +page_unbusy(vm_page_t pp, boolean_t unalloc) { vm_page_io_finish(pp); - vm_object_pip_subtract(pp->object, 1); -} - -static vm_page_t -page_hold(vnode_t *vp, int64_t start) -{ - vm_object_t obj; - vm_page_t pp; - - obj = vp->v_object; - zfs_vmobject_assert_wlocked(obj); - - for (;;) { - if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL && - pp->valid) { - if ((pp->oflags & VPO_BUSY) != 0) { - /* - * Reference the page before unlocking and - * sleeping so that the page daemon is less - * likely to reclaim it. - */ - vm_page_reference(pp); - vm_page_sleep(pp, "zfsmwb"); - continue; - } - - ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL); - vm_page_lock(pp); - vm_page_hold(pp); - vm_page_unlock(pp); - - } else - pp = NULL; - break; - } - return (pp); -} - -static void -page_unhold(vm_page_t pp) -{ - - vm_page_lock(pp); - vm_page_unhold(pp); - vm_page_unlock(pp); + if (unalloc) + vm_object_pip_subtract(pp->object, 1); } static caddr_t @@ -479,7 +441,8 @@ update_pages(vnode_t *vp, int64_t start, zfs_vmobject_wlock(obj); vm_page_undirty(pp); - } else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) { + } else if ((pp = page_busy(vp, start, off, nbytes, + TRUE)) != NULL) { zfs_vmobject_wunlock(obj); va = zfs_map_page(pp, &sf); @@ -488,7 +451,7 @@ update_pages(vnode_t *vp, int64_t start, zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - page_unbusy(pp); + page_unbusy(pp, TRUE); } len -= nbytes; off = 0; @@ -598,7 +561,7 @@ mappedread(vnode_t *vp, int nbytes, uio_ vm_page_t pp; uint64_t bytes = MIN(PAGESIZE - off, len); - if (pp = page_hold(vp, start)) { + if (pp = page_busy(vp, start, 0, 0, FALSE)) { struct sf_buf *sf; caddr_t va; @@ -607,7 +570,7 @@ mappedread(vnode_t *vp, int nbytes, uio_ error = uiomove(va + off, bytes, UIO_READ, uio); zfs_unmap_page(sf); zfs_vmobject_wlock(obj); - page_unhold(pp); + page_unbusy(pp, FALSE); } else { zfs_vmobject_wunlock(obj); error = dmu_read_uio(os, zp->z_id, uio, bytes); Modified: head/sys/fs/tmpfs/tmpfs_vnops.c ============================================================================== --- head/sys/fs/tmpfs/tmpfs_vnops.c Sun Aug 4 21:00:22 2013 (r253938) +++ head/sys/fs/tmpfs/tmpfs_vnops.c Sun Aug 4 21:07:24 2013 (r253939) @@ -485,13 +485,13 @@ tmpfs_nocacheread(vm_object_t tobj, vm_p vm_page_zero_invalid(m, TRUE); vm_page_wakeup(m); } - vm_page_lock(m); - vm_page_hold(m); - vm_page_unlock(m); + vm_page_io_start(m); VM_OBJECT_WUNLOCK(tobj); error = uiomove_fromphys(&m, offset, tlen, uio); + VM_OBJECT_WLOCK(tobj); + vm_page_io_finish(m); + VM_OBJECT_WUNLOCK(tobj); vm_page_lock(m); - vm_page_unhold(m); if (m->queue == PQ_NONE) { vm_page_deactivate(m); } else { @@ -602,16 +602,14 @@ tmpfs_mappedwrite(vm_object_t tobj, size vm_page_zero_invalid(tpg, TRUE); vm_page_wakeup(tpg); } - vm_page_lock(tpg); - vm_page_hold(tpg); - vm_page_unlock(tpg); + vm_page_io_start(tpg); VM_OBJECT_WUNLOCK(tobj); error = uiomove_fromphys(&tpg, offset, tlen, uio); VM_OBJECT_WLOCK(tobj); + vm_page_io_finish(tpg); if (error == 0) vm_page_dirty(tpg); vm_page_lock(tpg); - vm_page_unhold(tpg); if (tpg->queue == PQ_NONE) { vm_page_deactivate(tpg); } else { Modified: head/sys/kern/imgact_elf.c ============================================================================== --- head/sys/kern/imgact_elf.c Sun Aug 4 21:00:22 2013 (r253938) +++ head/sys/kern/imgact_elf.c Sun Aug 4 21:07:24 2013 (r253939) @@ -378,7 +378,7 @@ __elfN(map_partial)(vm_map_t map, vm_obj off = offset - trunc_page(offset); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, end - start); - vm_imgact_unmap_page(sf); + vm_imgact_unmap_page(object, sf); if (error) { return (KERN_FAILURE); } @@ -433,7 +433,7 @@ __elfN(map_insert)(vm_map_t map, vm_obje sz = PAGE_SIZE - off; error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, sz); - vm_imgact_unmap_page(sf); + vm_imgact_unmap_page(object, sf); if (error) { return (KERN_FAILURE); } @@ -553,7 +553,7 @@ __elfN(load_section)(struct image_params trunc_page(offset + filsz); error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)map_addr, copy_len); - vm_imgact_unmap_page(sf); + vm_imgact_unmap_page(object, sf); if (error) { return (error); } Modified: head/sys/kern/kern_exec.c ============================================================================== --- head/sys/kern/kern_exec.c Sun Aug 4 21:00:22 2013 (r253938) +++ head/sys/kern/kern_exec.c Sun Aug 4 21:07:24 2013 (r253939) @@ -973,7 +973,7 @@ exec_map_first_page(imgp) vm_page_wakeup(ma[0]); } vm_page_lock(ma[0]); - vm_page_hold(ma[0]); + vm_page_wire(ma[0]); vm_page_unlock(ma[0]); VM_OBJECT_WUNLOCK(object); @@ -994,7 +994,7 @@ exec_unmap_first_page(imgp) sf_buf_free(imgp->firstpage); imgp->firstpage = NULL; vm_page_lock(m); - vm_page_unhold(m); + vm_page_unwire(m, 0); vm_page_unlock(m); } } Modified: head/sys/kern/sys_process.c ============================================================================== --- head/sys/kern/sys_process.c Sun Aug 4 21:00:22 2013 (r253938) +++ head/sys/kern/sys_process.c Sun Aug 4 21:07:24 2013 (r253939) @@ -263,6 +263,7 @@ proc_rwmem(struct proc *p, struct uio *u writing = uio->uio_rw == UIO_WRITE; reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ; fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; + fault_flags |= VM_FAULT_IOBUSY; /* * Only map in one page at a time. We don't have to, but it @@ -287,9 +288,9 @@ proc_rwmem(struct proc *p, struct uio *u len = min(PAGE_SIZE - page_offset, uio->uio_resid); /* - * Fault and hold the page on behalf of the process. + * Fault and busy the page on behalf of the process. */ - error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m); + error = vm_fault_handle(map, pageno, reqprot, fault_flags, &m); if (error != KERN_SUCCESS) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; @@ -315,9 +316,9 @@ proc_rwmem(struct proc *p, struct uio *u /* * Release the page. */ - vm_page_lock(m); - vm_page_unhold(m); - vm_page_unlock(m); + VM_OBJECT_WLOCK(m->object); + vm_page_io_finish(m); + VM_OBJECT_WUNLOCK(m->object); } while (error == 0 && uio->uio_resid > 0); Modified: head/sys/vm/vm_extern.h ============================================================================== --- head/sys/vm/vm_extern.h Sun Aug 4 21:00:22 2013 (r253938) +++ head/sys/vm/vm_extern.h Sun Aug 4 21:07:24 2013 (r253939) @@ -63,7 +63,7 @@ void vm_fault_copy_entry(vm_map_t, vm_ma vm_ooffset_t *); int vm_fault_disable_pagefaults(void); void vm_fault_enable_pagefaults(int save); -int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, +int vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold); int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count); @@ -87,7 +87,7 @@ void vnode_pager_setsize(struct vnode *, int vslock(void *, size_t); void vsunlock(void *, size_t); struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset); -void vm_imgact_unmap_page(struct sf_buf *sf); +void vm_imgact_unmap_page(vm_object_t, struct sf_buf *sf); void vm_thread_dispose(struct thread *td); int vm_thread_new(struct thread *td, int pages); int vm_mlock(struct proc *, struct ucred *, const void *, size_t); Modified: head/sys/vm/vm_fault.c ============================================================================== --- head/sys/vm/vm_fault.c Sun Aug 4 21:00:22 2013 (r253938) +++ head/sys/vm/vm_fault.c Sun Aug 4 21:07:24 2013 (r253939) @@ -221,8 +221,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) ktrfault(vaddr, fault_type); #endif - result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags, - NULL); + result = vm_fault_handle(map, trunc_page(vaddr), fault_type, + fault_flags, NULL); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) ktrfaultend(result); @@ -231,7 +231,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr } int -vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, +vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; @@ -943,7 +943,10 @@ vnode_locked: vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; - vm_page_hold(fs.m); + if (fault_flags & VM_FAULT_IOBUSY) + vm_page_io_start(fs.m); + else + vm_page_hold(fs.m); } vm_page_unlock(fs.m); vm_page_wakeup(fs.m); @@ -1145,7 +1148,7 @@ vm_fault_quick_hold_pages(vm_map_t map, * and hold these pages. */ for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) - if (*mp == NULL && vm_fault_hold(map, va, prot, + if (*mp == NULL && vm_fault_handle(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } Modified: head/sys/vm/vm_glue.c ============================================================================== --- head/sys/vm/vm_glue.c Sun Aug 4 21:00:22 2013 (r253938) +++ head/sys/vm/vm_glue.c Sun Aug 4 21:07:24 2013 (r253939) @@ -223,7 +223,7 @@ vsunlock(void *addr, size_t len) * Return the pinned page if successful; otherwise, return NULL. */ static vm_page_t -vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) +vm_imgact_page_iostart(vm_object_t object, vm_ooffset_t offset) { vm_page_t m, ma[1]; vm_pindex_t pindex; @@ -249,9 +249,7 @@ vm_imgact_hold_page(vm_object_t object, } vm_page_wakeup(m); } - vm_page_lock(m); - vm_page_hold(m); - vm_page_unlock(m); + vm_page_io_start(m); out: VM_OBJECT_WUNLOCK(object); return (m); @@ -266,7 +264,7 @@ vm_imgact_map_page(vm_object_t object, v { vm_page_t m; - m = vm_imgact_hold_page(object, offset); + m = vm_imgact_page_iostart(object, offset); if (m == NULL) return (NULL); sched_pin(); @@ -277,16 +275,16 @@ vm_imgact_map_page(vm_object_t object, v * Destroy the given CPU private mapping and unpin the page that it mapped. */ void -vm_imgact_unmap_page(struct sf_buf *sf) +vm_imgact_unmap_page(vm_object_t object, struct sf_buf *sf) { vm_page_t m; m = sf_buf_page(sf); sf_buf_free(sf); sched_unpin(); - vm_page_lock(m); - vm_page_unhold(m); - vm_page_unlock(m); + VM_OBJECT_WLOCK(object); + vm_page_io_finish(m); + VM_OBJECT_WUNLOCK(object); } void Modified: head/sys/vm/vm_map.h ============================================================================== --- head/sys/vm/vm_map.h Sun Aug 4 21:00:22 2013 (r253938) +++ head/sys/vm/vm_map.h Sun Aug 4 21:07:24 2013 (r253939) @@ -329,6 +329,7 @@ long vmspace_resident_count(struct vmspa #define VM_FAULT_NORMAL 0 /* Nothing special */ #define VM_FAULT_CHANGE_WIRING 1 /* Change the wiring as appropriate */ #define VM_FAULT_DIRTY 2 /* Dirty the page; use w/VM_PROT_COPY */ +#define VM_FAULT_IOBUSY 4 /* Busy the faulted page */ /* * Initially, mappings are slightly sequential. The maximum window size must
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308042107.r74L7OvH096499>