From owner-svn-src-head@FreeBSD.ORG Sat Dec 25 21:26:56 2010 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 7E3FF106566B; Sat, 25 Dec 2010 21:26:56 +0000 (UTC) (envelope-from alc@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 6C1E18FC13; Sat, 25 Dec 2010 21:26:56 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id oBPLQuAX092762; Sat, 25 Dec 2010 21:26:56 GMT (envelope-from alc@svn.freebsd.org) Received: (from alc@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id oBPLQuRb092752; Sat, 25 Dec 2010 21:26:56 GMT (envelope-from alc@svn.freebsd.org) Message-Id: <201012252126.oBPLQuRb092752@svn.freebsd.org> From: Alan Cox Date: Sat, 25 Dec 2010 21:26:56 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r216699 - in head/sys: dev/cxgb/ulp/tom dev/drm kern net vm X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 25 Dec 2010 21:26:56 -0000 Author: alc Date: Sat Dec 25 21:26:56 2010 New Revision: 216699 URL: http://svn.freebsd.org/changeset/base/216699 Log: Introduce and use a new VM interface for temporarily pinning pages. This new interface replaces the combined use of vm_fault_quick() and pmap_extract_and_hold() throughout the kernel. In collaboration with: kib@ Deleted: head/sys/dev/cxgb/ulp/tom/cxgb_vm.c head/sys/dev/cxgb/ulp/tom/cxgb_vm.h Modified: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c head/sys/dev/drm/via_dmablit.c head/sys/kern/sys_pipe.c head/sys/kern/uipc_cow.c head/sys/kern/vfs_bio.c head/sys/net/bpf_zerocopy.c head/sys/vm/vm_extern.h head/sys/vm/vm_fault.c Modified: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c ============================================================================== --- head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c Sat Dec 25 17:35:30 2010 (r216698) +++ head/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c Sat Dec 25 21:26:56 2010 (r216699) @@ -90,7 +90,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include static int (*pru_sosend)(struct socket *so, struct sockaddr *addr, @@ -218,8 +217,9 @@ cxgb_hold_iovec_pages(struct uio *uio, v count = min(count, npages); - err = vm_fault_hold_user_pages(map, - (vm_offset_t)iov->iov_base, mp, count, prot); + /* The following return value is not used. XXX */ + err = vm_fault_quick_hold_pages(map, + (vm_offset_t)iov->iov_base, iov->iov_len, prot, mp, count); mp += count; totcount += count; curbytes = iov->iov_len; @@ -503,7 +503,7 @@ cxgb_sosend(struct socket *so, struct so * - the number of bytes to be transferred exceeds the threshold * - the number of bytes currently in flight won't exceed the in-flight * threshold XXX TODO - * - vm_fault_hold_user_pages succeeds + * - vm_fault_quick_hold_pages succeeds * - blocking socket XXX for now * */ @@ -970,7 +970,7 @@ cxgb_soreceive(struct socket *so, struct * - the number of bytes to be transferred exceeds the threshold * - the number of bytes currently in flight won't exceed the in-flight * threshold XXX TODO - * - vm_fault_hold_user_pages succeeds + * - vm_fault_quick_hold_pages succeeds * - blocking socket XXX for now * - iovcnt is 1 * Modified: head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c ============================================================================== --- head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c Sat Dec 25 17:35:30 2010 (r216698) +++ head/sys/dev/cxgb/ulp/tom/cxgb_ddp.c Sat Dec 25 21:26:56 2010 (r216699) @@ -90,7 +90,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #define MAX_SCHEDULE_TIMEOUT 300 @@ -130,14 +129,6 @@ t3_pin_pages(bus_dma_tag_t tag, bus_dmam struct ddp_gather_list *p; vm_map_t map; - /* - * XXX need x86 agnostic check - */ - if (addr + len > VM_MAXUSER_ADDRESS) - return (EFAULT); - - - pg_off = addr & PAGE_MASK; npages = (pg_off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; p = malloc(sizeof(struct ddp_gather_list) + npages * sizeof(vm_page_t *), @@ -146,10 +137,11 @@ t3_pin_pages(bus_dma_tag_t tag, bus_dmam return (ENOMEM); map = &curthread->td_proc->p_vmspace->vm_map; - err = vm_fault_hold_user_pages(map, addr, p->dgl_pages, npages, - VM_PROT_READ | VM_PROT_WRITE); - if (err) + if (vm_fault_quick_hold_pages(map, addr, len, VM_PROT_READ | + VM_PROT_WRITE, p->dgl_pages, npages) < 0) { + err = EFAULT; goto free_gl; + } if (gl && gl->dgl_offset == pg_off && gl->dgl_nelem >= npages && gl->dgl_length >= len) { Modified: head/sys/dev/drm/via_dmablit.c ============================================================================== --- head/sys/dev/drm/via_dmablit.c Sat Dec 25 17:35:30 2010 (r216698) +++ head/sys/dev/drm/via_dmablit.c Sat Dec 25 21:26:56 2010 (r216699) @@ -177,11 +177,10 @@ via_free_sg_info(drm_via_sg_info_t *vsg) free(vsg->desc_pages, DRM_MEM_DRIVER); case dr_via_pages_locked: for (i=0; i < vsg->num_pages; ++i) { - if ( NULL != (page = vsg->pages[i])) { - vm_page_lock(page); - vm_page_unwire(page, 0); - vm_page_unlock(page); - } + page = vsg->pages[i]; + vm_page_lock(page); + vm_page_unwire(page, 0); + vm_page_unlock(page); } case dr_via_pages_alloc: free(vsg->pages, DRM_MEM_DRIVER); @@ -224,41 +223,31 @@ via_lock_all_dma_pages(drm_via_sg_info_t { unsigned long first_pfn = VIA_PFN(xfer->mem_addr); vm_page_t m; - vm_map_t map; int i; - map = &curproc->p_vmspace->vm_map; - vsg->num_pages = VIA_PFN(xfer->mem_addr + (xfer->num_lines * xfer->mem_stride -1)) - first_pfn + 1; - /* Make sure that the user has access to these pages */ - for(i = 0; i < vsg->num_pages; i++) { - if (vm_fault_quick((caddr_t)xfer->mem_addr + IDX_TO_OFF(i), - VM_PROT_RW) < 0) - return (-EACCES); - } - if (NULL == (vsg->pages = malloc(sizeof(vm_page_t) * vsg->num_pages, - DRM_MEM_DRIVER, M_NOWAIT | M_ZERO))) + DRM_MEM_DRIVER, M_NOWAIT))) return -ENOMEM; - for(i = 0; i < vsg->num_pages; i++) { - m = pmap_extract_and_hold(map->pmap, - (vm_offset_t)xfer->mem_addr + IDX_TO_OFF(i), VM_PROT_RW); - if (m == NULL) - break; + vsg->state = dr_via_pages_alloc; + + if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, + (vm_offset_t)xfer->mem_addr, vsg->num_pages * PAGE_SIZE, + VM_PROT_READ | VM_PROT_WRITE, vsg->pages, vsg->num_pages) < 0) + return -EACCES; + + for (i = 0; i < vsg->num_pages; i++) { + m = vsg->pages[i]; vm_page_lock(m); vm_page_wire(m); vm_page_unhold(m); vm_page_unlock(m); - vsg->pages[i] = m; } vsg->state = dr_via_pages_locked; - if (i != vsg->num_pages) - return -EINVAL; - DRM_DEBUG("DMA pages locked\n"); return 0; Modified: head/sys/kern/sys_pipe.c ============================================================================== --- head/sys/kern/sys_pipe.c Sat Dec 25 17:35:30 2010 (r216698) +++ head/sys/kern/sys_pipe.c Sat Dec 25 21:26:56 2010 (r216699) @@ -747,10 +747,8 @@ pipe_build_write_buffer(wpipe, uio) struct pipe *wpipe; struct uio *uio; { - pmap_t pmap; u_int size; int i; - vm_offset_t addr, endaddr; PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); KASSERT(wpipe->pipe_state & PIPE_DIRECTW, @@ -760,25 +758,10 @@ pipe_build_write_buffer(wpipe, uio) if (size > wpipe->pipe_buffer.size) size = wpipe->pipe_buffer.size; - pmap = vmspace_pmap(curproc->p_vmspace); - endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); - addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); - if (endaddr < addr) + if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, + (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ, + wpipe->pipe_map.ms, PIPENPAGES)) < 0) return (EFAULT); - for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { - /* - * vm_fault_quick() can sleep. - */ - race: - if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { - vm_page_unhold_pages(wpipe->pipe_map.ms, i); - return (EFAULT); - } - wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, - VM_PROT_READ); - if (wpipe->pipe_map.ms[i] == NULL) - goto race; - } /* * set up the control block Modified: head/sys/kern/uipc_cow.c ============================================================================== --- head/sys/kern/uipc_cow.c Sat Dec 25 17:35:30 2010 (r216698) +++ head/sys/kern/uipc_cow.c Sat Dec 25 21:26:56 2010 (r216699) @@ -103,24 +103,20 @@ socow_setup(struct mbuf *m0, struct uio struct vmspace *vmspace; struct vm_map *map; vm_offset_t offset, uva; + vm_size_t len; socow_stats.attempted++; vmspace = curproc->p_vmspace; map = &vmspace->vm_map; uva = (vm_offset_t) uio->uio_iov->iov_base; offset = uva & PAGE_MASK; + len = PAGE_SIZE - offset; /* * Verify that access to the given address is allowed from user-space. */ - if (vm_fault_quick((caddr_t)uva, VM_PROT_READ) < 0) - return (0); - - /* - * verify page is mapped & not already wired for i/o - */ - pp = pmap_extract_and_hold(map->pmap, uva, VM_PROT_READ); - if (pp == NULL) { + if (vm_fault_quick_hold_pages(map, uva, len, &pp, 1, VM_PROT_READ) < + 0) { socow_stats.fail_not_mapped++; return(0); } @@ -165,7 +161,7 @@ socow_setup(struct mbuf *m0, struct uio */ MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, socow_iodone, (void*)sf_buf_kva(sf), sf, M_RDONLY, EXT_SFBUF); - m0->m_len = PAGE_SIZE - offset; + m0->m_len = len; m0->m_data = (caddr_t)sf_buf_kva(sf) + offset; socow_stats.success++; Modified: head/sys/kern/vfs_bio.c ============================================================================== --- head/sys/kern/vfs_bio.c Sat Dec 25 17:35:30 2010 (r216698) +++ head/sys/kern/vfs_bio.c Sat Dec 25 21:26:56 2010 (r216699) @@ -3855,46 +3855,19 @@ vm_hold_free_pages(struct buf *bp, int n int vmapbuf(struct buf *bp) { - caddr_t addr, kva; + caddr_t kva; vm_prot_t prot; - int pidx, i; - struct vm_page *m; - struct pmap *pmap = &curproc->p_vmspace->vm_pmap; + int pidx; if (bp->b_bufsize < 0) return (-1); prot = VM_PROT_READ; if (bp->b_iocmd == BIO_READ) prot |= VM_PROT_WRITE; /* Less backwards than it looks */ - for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data), pidx = 0; - addr < bp->b_data + bp->b_bufsize; - addr += PAGE_SIZE, pidx++) { - /* - * Do the vm_fault if needed; do the copy-on-write thing - * when reading stuff off device into memory. - * - * NOTE! Must use pmap_extract() because addr may be in - * the userland address space, and kextract is only guarenteed - * to work for the kernland address space (see: sparc64 port). - */ -retry: - if (vm_fault_quick(addr >= bp->b_data ? addr : bp->b_data, - prot) < 0) { - for (i = 0; i < pidx; ++i) { - vm_page_lock(bp->b_pages[i]); - vm_page_unhold(bp->b_pages[i]); - vm_page_unlock(bp->b_pages[i]); - bp->b_pages[i] = NULL; - } - return(-1); - } - m = pmap_extract_and_hold(pmap, (vm_offset_t)addr, prot); - if (m == NULL) - goto retry; - bp->b_pages[pidx] = m; - } - if (pidx > btoc(MAXPHYS)) - panic("vmapbuf: mapped more than MAXPHYS"); + if ((pidx = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, + (vm_offset_t)bp->b_data, bp->b_bufsize, prot, bp->b_pages, + btoc(MAXPHYS))) < 0) + return (-1); pmap_qenter((vm_offset_t)bp->b_saveaddr, bp->b_pages, pidx); kva = bp->b_saveaddr; Modified: head/sys/net/bpf_zerocopy.c ============================================================================== --- head/sys/net/bpf_zerocopy.c Sat Dec 25 17:35:30 2010 (r216698) +++ head/sys/net/bpf_zerocopy.c Sat Dec 25 21:26:56 2010 (r216699) @@ -161,12 +161,8 @@ zbuf_sfbuf_get(struct vm_map *map, vm_of struct sf_buf *sf; vm_page_t pp; - if (vm_fault_quick((caddr_t) uaddr, VM_PROT_READ | VM_PROT_WRITE) < - 0) - return (NULL); - pp = pmap_extract_and_hold(map->pmap, uaddr, VM_PROT_READ | - VM_PROT_WRITE); - if (pp == NULL) + if (vm_fault_quick_hold_pages(map, uaddr, PAGE_SIZE, VM_PROT_READ | + VM_PROT_WRITE, &pp, 1) < 0) return (NULL); vm_page_lock(pp); vm_page_wire(pp); Modified: head/sys/vm/vm_extern.h ============================================================================== --- head/sys/vm/vm_extern.h Sat Dec 25 17:35:30 2010 (r216698) +++ head/sys/vm/vm_extern.h Sat Dec 25 21:26:56 2010 (r216699) @@ -63,6 +63,8 @@ void vm_fault_copy_entry(vm_map_t, vm_ma vm_ooffset_t *); int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold); +int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, + vm_prot_t prot, vm_page_t *ma, int max_count); void vm_fault_unwire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); int vm_fault_wire(vm_map_t, vm_offset_t, vm_offset_t, boolean_t); int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int); Modified: head/sys/vm/vm_fault.c ============================================================================== --- head/sys/vm/vm_fault.c Sat Dec 25 17:35:30 2010 (r216698) +++ head/sys/vm/vm_fault.c Sat Dec 25 21:26:56 2010 (r216699) @@ -1045,6 +1045,81 @@ vm_fault_prefault(pmap_t pmap, vm_offset } /* + * Hold each of the physical pages that are mapped by the specified range of + * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid + * and allow the specified types of access, "prot". If all of the implied + * pages are successfully held, then the number of held pages is returned + * together with pointers to those pages in the array "ma". However, if any + * of the pages cannot be held, -1 is returned. + */ +int +vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, + vm_prot_t prot, vm_page_t *ma, int max_count) +{ + vm_offset_t end, va; + vm_page_t *mp; + int count; + boolean_t pmap_failed; + + end = round_page(addr + len); + addr = trunc_page(addr); + + /* + * Check for illegal addresses. + */ + if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map)) + return (-1); + + count = howmany(end - addr, PAGE_SIZE); + if (count > max_count) + panic("vm_fault_quick_hold_pages: count > max_count"); + + /* + * Most likely, the physical pages are resident in the pmap, so it is + * faster to try pmap_extract_and_hold() first. + */ + pmap_failed = FALSE; + for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) { + *mp = pmap_extract_and_hold(map->pmap, va, prot); + if (*mp == NULL) + pmap_failed = TRUE; + else if ((prot & VM_PROT_WRITE) != 0 && + (*ma)->dirty != VM_PAGE_BITS_ALL) { + /* + * Explicitly dirty the physical page. Otherwise, the + * caller's changes may go unnoticed because they are + * performed through an unmanaged mapping or by a DMA + * operation. + */ + vm_page_lock_queues(); + vm_page_dirty(*mp); + vm_page_unlock_queues(); + } + } + if (pmap_failed) { + /* + * One or more pages could not be held by the pmap. Either no + * page was mapped at the specified virtual address or that + * mapping had insufficient permissions. Attempt to fault in + * and hold these pages. + */ + for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) + if (*mp == NULL && vm_fault_hold(map, va, prot, + VM_FAULT_NORMAL, mp) != KERN_SUCCESS) + goto error; + } + return (count); +error: + for (mp = ma; mp < ma + count; mp++) + if (*mp != NULL) { + vm_page_lock(*mp); + vm_page_unhold(*mp); + vm_page_unlock(*mp); + } + return (-1); +} + +/* * vm_fault_quick: * * Ensure that the requested virtual address, which may be in userland,