Date: Mon, 20 Dec 2010 22:49:31 +0000 (UTC) From: Alan Cox <alc@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r216604 - in head/sys: dev/cxgb/ulp/tom kern vm Message-ID: <201012202249.oBKMnV12095378@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: alc Date: Mon Dec 20 22:49:31 2010 New Revision: 216604 URL: http://svn.freebsd.org/changeset/base/216604 Log: Introduce vm_fault_hold() and use it to (1) eliminate a long-standing race condition in proc_rwmem() and to (2) simplify the implementation of the cxgb driver's vm_fault_hold_user_pages(). Specifically, in proc_rwmem() the requested read or write could fail because the targeted page could be reclaimed between the calls to vm_fault() and vm_page_hold(). In collaboration with: kib@ MFC after: 6 weeks Modified: head/sys/dev/cxgb/ulp/tom/cxgb_vm.c head/sys/kern/sys_process.c head/sys/vm/vm_extern.h head/sys/vm/vm_fault.c head/sys/vm/vm_map.h Modified: head/sys/dev/cxgb/ulp/tom/cxgb_vm.c ============================================================================== --- head/sys/dev/cxgb/ulp/tom/cxgb_vm.c Mon Dec 20 21:12:18 2010 (r216603) +++ head/sys/dev/cxgb/ulp/tom/cxgb_vm.c Mon Dec 20 22:49:31 2010 (r216604) @@ -66,7 +66,7 @@ vm_fault_hold_user_pages(vm_map_t map, v int count, vm_prot_t prot) { vm_offset_t end, va; - int faults, rv; + int faults; pmap_t pmap; vm_page_t m, *pages; @@ -122,20 +122,11 @@ vm_fault_hold_user_pages(vm_map_t map, v /* * Pages either have insufficient permissions or are not present * trigger a fault where neccessary - * */ - rv = 0; for (pages = mp, va = addr; va < end; va += PAGE_SIZE, pages++) { - /* - * Account for a very narrow race where the page may be - * taken away from us before it is held - */ - while (*pages == NULL) { - rv = vm_fault(map, va, prot, VM_FAULT_NORMAL); - if (rv) - goto error; - *pages = pmap_extract_and_hold(pmap, va, prot); - } + if (*pages == NULL && vm_fault_hold(map, va, prot, + VM_FAULT_NORMAL, pages) != KERN_SUCCESS) + goto error; } return (0); error: Modified: head/sys/kern/sys_process.c ============================================================================== --- head/sys/kern/sys_process.c Mon Dec 20 21:12:18 2010 (r216603) +++ head/sys/kern/sys_process.c Mon Dec 20 22:49:31 2010 (r216604) @@ -237,10 +237,9 @@ int proc_rwmem(struct proc *p, struct uio *uio) { vm_map_t map; - vm_object_t backing_object, object; vm_offset_t pageno; /* page number */ vm_prot_t reqprot; - int error, writing; + int error, fault_flags, page_offset, writing; /* * Assert that someone has locked this vmspace. (Should be @@ -255,26 +254,24 @@ proc_rwmem(struct proc *p, struct uio *u */ map = &p->p_vmspace->vm_map; + /* + * If we are writing, then we request vm_fault() to create a private + * copy of each page. Since these copies will not be writeable by the + * process, we must explicity request that they be dirtied. + */ writing = uio->uio_rw == UIO_WRITE; reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ; + fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL; /* * Only map in one page at a time. We don't have to, but it * makes things easier. This way is trivial - right? */ do { - vm_map_t tmap; vm_offset_t uva; - int page_offset; /* offset into page */ - vm_map_entry_t out_entry; - vm_prot_t out_prot; - boolean_t wired; - vm_pindex_t pindex; u_int len; vm_page_t m; - object = NULL; - uva = (vm_offset_t)uio->uio_offset; /* @@ -289,10 +286,10 @@ proc_rwmem(struct proc *p, struct uio *u len = min(PAGE_SIZE - page_offset, uio->uio_resid); /* - * Fault the page on behalf of the process + * Fault and hold the page on behalf of the process. */ - error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL); - if (error) { + error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m); + if (error != KERN_SUCCESS) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; else @@ -301,61 +298,18 @@ proc_rwmem(struct proc *p, struct uio *u } /* - * Now we need to get the page. out_entry and wired - * aren't used. One would think the vm code - * would be a *bit* nicer... We use tmap because - * vm_map_lookup() can change the map argument. - */ - tmap = map; - error = vm_map_lookup(&tmap, pageno, reqprot, &out_entry, - &object, &pindex, &out_prot, &wired); - if (error) { - error = EFAULT; - break; - } - VM_OBJECT_LOCK(object); - while ((m = vm_page_lookup(object, pindex)) == NULL && - !writing && - (backing_object = object->backing_object) != NULL) { - /* - * Allow fallback to backing objects if we are reading. - */ - VM_OBJECT_LOCK(backing_object); - pindex += OFF_TO_IDX(object->backing_object_offset); - VM_OBJECT_UNLOCK(object); - object = backing_object; - } - if (writing && m != NULL) { - vm_page_dirty(m); - vm_pager_page_unswapped(m); - } - VM_OBJECT_UNLOCK(object); - if (m == NULL) { - vm_map_lookup_done(tmap, out_entry); - error = EFAULT; - break; - } - - /* - * Hold the page in memory. - */ - vm_page_lock(m); - vm_page_hold(m); - vm_page_unlock(m); - - /* - * We're done with tmap now. - */ - vm_map_lookup_done(tmap, out_entry); - - /* * Now do the i/o move. */ error = uiomove_fromphys(&m, page_offset, len, uio); /* Make the I-cache coherent for breakpoints. */ - if (!error && writing && (out_prot & VM_PROT_EXECUTE)) - vm_sync_icache(map, uva, len); + if (writing && error == 0) { + vm_map_lock_read(map); + if (vm_map_check_protection(map, pageno, pageno + + PAGE_SIZE, VM_PROT_EXECUTE)) + vm_sync_icache(map, uva, len); + vm_map_unlock_read(map); + } /* * Release the page. Modified: head/sys/vm/vm_extern.h ============================================================================== --- head/sys/vm/vm_extern.h Mon Dec 20 21:12:18 2010 (r216603) +++ head/sys/vm/vm_extern.h Mon Dec 20 22:49:31 2010 (r216604) @@ -61,6 +61,8 @@ int useracc(void *, int, int); int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int); void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t, vm_ooffset_t *); +int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags, vm_page_t *m_hold); void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int); Modified: head/sys/vm/vm_fault.c ============================================================================== --- head/sys/vm/vm_fault.c Mon Dec 20 21:12:18 2010 (r216603) +++ head/sys/vm/vm_fault.c Mon Dec 20 22:49:31 2010 (r216604) @@ -201,13 +201,20 @@ unlock_and_deallocate(struct faultstate * KERN_SUCCESS is returned if the page fault is handled; otherwise, * a standard error specifying why the fault is fatal is returned. * - * * The map in question must be referenced, and remains so. * Caller may hold no locks. */ int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, - int fault_flags) + int fault_flags) +{ + + return (vm_fault_hold(map, vaddr, fault_type, fault_flags, NULL)); +} + +int +vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags, vm_page_t *m_hold) { vm_prot_t prot; int is_first_object_locked, result; @@ -880,7 +887,8 @@ vnode_locked: if (hardfault) fs.entry->lastr = fs.pindex + faultcount - behind; - if (prot & VM_PROT_WRITE) { + if ((prot & VM_PROT_WRITE) != 0 || + (fault_flags & VM_FAULT_DIRTY) != 0) { vm_object_set_writeable_dirty(fs.object); /* @@ -906,8 +914,9 @@ vnode_locked: * Also tell the backing pager, if any, that it should remove * any swap backing since the page is now dirty. */ - if ((fault_type & VM_PROT_WRITE) != 0 && - (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) { + if (((fault_type & VM_PROT_WRITE) != 0 && + (fault_flags & VM_FAULT_CHANGE_WIRING) == 0) || + (fault_flags & VM_FAULT_DIRTY) != 0) { vm_page_dirty(fs.m); vm_pager_page_unswapped(fs.m); } @@ -949,6 +958,10 @@ vnode_locked: vm_page_unwire(fs.m, 1); } else vm_page_activate(fs.m); + if (m_hold != NULL) { + *m_hold = fs.m; + vm_page_hold(fs.m); + } vm_page_unlock(fs.m); vm_page_wakeup(fs.m); Modified: head/sys/vm/vm_map.h ============================================================================== --- head/sys/vm/vm_map.h Mon Dec 20 21:12:18 2010 (r216603) +++ head/sys/vm/vm_map.h Mon Dec 20 22:49:31 2010 (r216604) @@ -325,6 +325,7 @@ long vmspace_wired_count(struct vmspace */ #define VM_FAULT_NORMAL 0 /* Nothing special */ #define VM_FAULT_CHANGE_WIRING 1 /* Change the wiring as appropriate */ +#define VM_FAULT_DIRTY 2 /* Dirty the page; use w/VM_PROT_COPY */ /* * The following "find_space" options are supported by vm_map_find()
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201012202249.oBKMnV12095378>