From owner-svn-src-head@FreeBSD.ORG Fri Oct 24 09:49:00 2014 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 8BF4C6B5; Fri, 24 Oct 2014 09:49:00 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 777A339C; Fri, 24 Oct 2014 09:49:00 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id s9O9n0Wx019504; Fri, 24 Oct 2014 09:49:00 GMT (envelope-from royger@FreeBSD.org) Received: (from royger@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id s9O9mxNP019487; Fri, 24 Oct 2014 09:48:59 GMT (envelope-from royger@FreeBSD.org) Message-Id: <201410240948.s9O9mxNP019487@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: royger set sender to royger@FreeBSD.org using -f From: Roger Pau Monné Date: Fri, 24 Oct 2014 09:48:59 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r273582 - in head/sys/amd64: amd64 include X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 24 Oct 2014 09:49:00 -0000 Author: royger Date: Fri Oct 24 09:48:58 2014 New Revision: 273582 URL: https://svnweb.freebsd.org/changeset/base/273582 Log: amd64: make uiomove_fromphys functional for pages not mapped by the DMAP Place the code introduced in r268660 into a separate function that can be called from uiomove_fromphys. Instead of pre-allocating two KVA pages use vmem_alloc to allocate them on demand when needed. This prevents blocking if a page fault is taken while physical addresses from outside the DMAP are used, since the lock is now removed. Also introduce a safety catch in PHYS_TO_DMAP and DMAP_TO_PHYS. Sponsored by: Citrix Systems R&D Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D947 amd64/amd64/pmap.c: - Factor out the code to deal with non DMAP addresses from pmap_copy_pages and place it in pmap_map_io_transient. - Change the code to use vmem_alloc instead of a set of pre-allocated pages. - Use pmap_qenter and don't pin the thread if there can be page faults. amd64/amd64/uio_machdep.c: - Use pmap_map_io_transient in order to correctly deal with physical addresses not covered by the DMAP. amd64/include/pmap.h: - Add the prototypes for the new functions. amd64/include/vmparam.h: - Add safety catches to make sure PHYS_TO_DMAP and DMAP_TO_PHYS are only used with addresses covered by the DMAP. Modified: head/sys/amd64/amd64/pmap.c head/sys/amd64/amd64/uio_machdep.c head/sys/amd64/include/pmap.h head/sys/amd64/include/vmparam.h Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Fri Oct 24 07:50:34 2014 (r273581) +++ head/sys/amd64/amd64/pmap.c Fri Oct 24 09:48:58 2014 (r273582) @@ -115,6 +115,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -402,11 +403,6 @@ SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_sav CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU", "Count of saved TLB context on switch"); -/* pmap_copy_pages() over non-DMAP */ -static struct mtx cpage_lock; -static vm_offset_t cpage_a; -static vm_offset_t cpage_b; - /* * Crashdump maps. */ @@ -1072,10 +1068,6 @@ pmap_init(void) M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); - - mtx_init(&cpage_lock, "cpage", NULL, MTX_DEF); - cpage_a = kva_alloc(PAGE_SIZE); - cpage_b = kva_alloc(PAGE_SIZE); } static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, @@ -5056,66 +5048,24 @@ pmap_copy_pages(vm_page_t ma[], vm_offse vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; - vm_page_t m_a, m_b; - vm_paddr_t p_a, p_b; - pt_entry_t *pte; - vm_offset_t a_pg_offset, b_pg_offset; + vm_page_t pages[2]; + vm_offset_t vaddr[2], a_pg_offset, b_pg_offset; int cnt; - boolean_t pinned; + boolean_t mapped; - /* - * NB: The sequence of updating a page table followed by accesses - * to the corresponding pages used in the !DMAP case is subject to - * the situation described in the "AMD64 Architecture Programmer's - * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special - * Coherency Considerations". Therefore, issuing the INVLPG right - * after modifying the PTE bits is crucial. - */ - pinned = FALSE; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; - m_a = ma[a_offset >> PAGE_SHIFT]; - p_a = m_a->phys_addr; + pages[0] = ma[a_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; - m_b = mb[b_offset >> PAGE_SHIFT]; - p_b = m_b->phys_addr; + pages[1] = mb[b_offset >> PAGE_SHIFT]; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); - if (__predict_false(p_a < DMAP_MIN_ADDRESS || - p_a > DMAP_MIN_ADDRESS + dmaplimit)) { - mtx_lock(&cpage_lock); - sched_pin(); - pinned = TRUE; - pte = vtopte(cpage_a); - *pte = p_a | X86_PG_A | X86_PG_V | - pmap_cache_bits(kernel_pmap, m_a->md.pat_mode, 0); - invlpg(cpage_a); - a_cp = (char *)cpage_a + a_pg_offset; - } else { - a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; - } - if (__predict_false(p_b < DMAP_MIN_ADDRESS || - p_b > DMAP_MIN_ADDRESS + dmaplimit)) { - if (!pinned) { - mtx_lock(&cpage_lock); - sched_pin(); - pinned = TRUE; - } - pte = vtopte(cpage_b); - *pte = p_b | X86_PG_A | X86_PG_M | X86_PG_RW | - X86_PG_V | pmap_cache_bits(kernel_pmap, - m_b->md.pat_mode, 0); - invlpg(cpage_b); - b_cp = (char *)cpage_b + b_pg_offset; - } else { - b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; - } + mapped = pmap_map_io_transient(pages, vaddr, 2, FALSE); + a_cp = (char *)vaddr[0] + a_pg_offset; + b_cp = (char *)vaddr[1] + b_pg_offset; bcopy(a_cp, b_cp, cnt); - if (__predict_false(pinned)) { - sched_unpin(); - mtx_unlock(&cpage_lock); - pinned = FALSE; - } + if (__predict_false(mapped)) + pmap_unmap_io_transient(pages, vaddr, 2, FALSE); a_offset += cnt; b_offset += cnt; xfersize -= cnt; @@ -6901,6 +6851,107 @@ done: *num = idx; } +/** + * Get the kernel virtual address of a set of physical pages. If there are + * physical addresses not covered by the DMAP perform a transient mapping + * that will be removed when calling pmap_unmap_io_transient. + * + * \param page The pages the caller wishes to obtain the virtual + * address on the kernel memory map. + * \param vaddr On return contains the kernel virtual memory address + * of the pages passed in the page parameter. + * \param count Number of pages passed in. + * \param can_fault TRUE if the thread using the mapped pages can take + * page faults, FALSE otherwise. + * + * \returns TRUE if the caller must call pmap_unmap_io_transient when + * finished or FALSE otherwise. + * + */ +boolean_t +pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, + boolean_t can_fault) +{ + vm_paddr_t paddr; + boolean_t needs_mapping; + pt_entry_t *pte; + int cache_bits, error, i; + + /* + * Allocate any KVA space that we need, this is done in a separate + * loop to prevent calling vmem_alloc while pinned. + */ + needs_mapping = FALSE; + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (__predict_false(paddr >= dmaplimit)) { + error = vmem_alloc(kernel_arena, PAGE_SIZE, + M_BESTFIT | M_WAITOK, &vaddr[i]); + KASSERT(error == 0, ("vmem_alloc failed: %d", error)); + needs_mapping = TRUE; + } else { + vaddr[i] = PHYS_TO_DMAP(paddr); + } + } + + /* Exit early if everything is covered by the DMAP */ + if (!needs_mapping) + return (FALSE); + + /* + * NB: The sequence of updating a page table followed by accesses + * to the corresponding pages used in the !DMAP case is subject to + * the situation described in the "AMD64 Architecture Programmer's + * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special + * Coherency Considerations". Therefore, issuing the INVLPG right + * after modifying the PTE bits is crucial. + */ + if (!can_fault) + sched_pin(); + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (paddr >= dmaplimit) { + if (can_fault) { + /* + * Slow path, since we can get page faults + * while mappings are active don't pin the + * thread to the CPU and instead add a global + * mapping visible to all CPUs. + */ + pmap_qenter(vaddr[i], &page[i], 1); + } else { + pte = vtopte(vaddr[i]); + cache_bits = pmap_cache_bits(kernel_pmap, + page[i]->md.pat_mode, 0); + pte_store(pte, paddr | X86_PG_RW | X86_PG_V | + cache_bits); + invlpg(vaddr[i]); + } + } + } + + return (needs_mapping); +} + +void +pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, + boolean_t can_fault) +{ + vm_paddr_t paddr; + int i; + + if (!can_fault) + sched_unpin(); + for (i = 0; i < count; i++) { + paddr = VM_PAGE_TO_PHYS(page[i]); + if (paddr >= dmaplimit) { + if (can_fault) + pmap_qremove(vaddr[i], 1); + vmem_free(kernel_arena, vaddr[i], PAGE_SIZE); + } + } +} + #include "opt_ddb.h" #ifdef DDB #include Modified: head/sys/amd64/amd64/uio_machdep.c ============================================================================== --- head/sys/amd64/amd64/uio_machdep.c Fri Oct 24 07:50:34 2014 (r273581) +++ head/sys/amd64/amd64/uio_machdep.c Fri Oct 24 09:48:58 2014 (r273582) @@ -61,10 +61,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offs struct thread *td = curthread; struct iovec *iov; void *cp; - vm_offset_t page_offset; + vm_offset_t page_offset, vaddr; size_t cnt; int error = 0; int save = 0; + boolean_t mapped; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove_fromphys: mode")); @@ -72,6 +73,7 @@ uiomove_fromphys(vm_page_t ma[], vm_offs ("uiomove_fromphys proc")); save = td->td_pflags & TDP_DEADLKTREAT; td->td_pflags |= TDP_DEADLKTREAT; + mapped = FALSE; while (n > 0 && uio->uio_resid) { iov = uio->uio_iov; cnt = iov->iov_len; @@ -84,8 +86,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offs cnt = n; page_offset = offset & PAGE_MASK; cnt = min(cnt, PAGE_SIZE - page_offset); - cp = (char *)PHYS_TO_DMAP(ma[offset >> PAGE_SHIFT]->phys_addr) + - page_offset; + if (uio->uio_segflg != UIO_NOCOPY) { + mapped = pmap_map_io_transient( + &ma[offset >> PAGE_SHIFT], &vaddr, 1, TRUE); + cp = (char *)vaddr + page_offset; + } switch (uio->uio_segflg) { case UIO_USERSPACE: maybe_yield(); @@ -105,6 +110,11 @@ uiomove_fromphys(vm_page_t ma[], vm_offs case UIO_NOCOPY: break; } + if (__predict_false(mapped)) { + pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], + &vaddr, 1, TRUE); + mapped = FALSE; + } iov->iov_base = (char *)iov->iov_base + cnt; iov->iov_len -= cnt; uio->uio_resid -= cnt; @@ -113,6 +123,9 @@ uiomove_fromphys(vm_page_t ma[], vm_offs n -= cnt; } out: + if (__predict_false(mapped)) + pmap_unmap_io_transient(&ma[offset >> PAGE_SHIFT], &vaddr, 1, + TRUE); if (save == 0) td->td_pflags &= ~TDP_DEADLKTREAT; return (error); Modified: head/sys/amd64/include/pmap.h ============================================================================== --- head/sys/amd64/include/pmap.h Fri Oct 24 07:50:34 2014 (r273581) +++ head/sys/amd64/include/pmap.h Fri Oct 24 09:48:58 2014 (r273582) @@ -397,6 +397,8 @@ void pmap_invalidate_cache_pages(vm_page void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force); void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num); +boolean_t pmap_map_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); +void pmap_unmap_io_transient(vm_page_t *, vm_offset_t *, int, boolean_t); #endif /* _KERNEL */ #endif /* !LOCORE */ Modified: head/sys/amd64/include/vmparam.h ============================================================================== --- head/sys/amd64/include/vmparam.h Fri Oct 24 07:50:34 2014 (r273581) +++ head/sys/amd64/include/vmparam.h Fri Oct 24 09:48:58 2014 (r273582) @@ -175,8 +175,18 @@ #define VM_MAX_ADDRESS UPT_MAX_ADDRESS #define VM_MIN_ADDRESS (0) -#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS) -#define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS) +#define PHYS_TO_DMAP(x) ({ \ + KASSERT((x) < dmaplimit, \ + ("physical address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) | DMAP_MIN_ADDRESS; }) + +#define DMAP_TO_PHYS(x) ({ \ + KASSERT((x) < (DMAP_MIN_ADDRESS + dmaplimit) && \ + (x) >= DMAP_MIN_ADDRESS, \ + ("virtual address %#jx not covered by the DMAP", \ + (uintmax_t)x)); \ + (x) & ~DMAP_MIN_ADDRESS; }) /* * How many physical pages per kmem arena virtual page.