From owner-svn-src-all@freebsd.org Fri May 13 16:03:51 2016 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 8FF7BB3A351; Fri, 13 May 2016 16:03:51 +0000 (UTC) (envelope-from andrew@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 695891B9A; Fri, 13 May 2016 16:03:51 +0000 (UTC) (envelope-from andrew@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u4DG3o0O005656; Fri, 13 May 2016 16:03:50 GMT (envelope-from andrew@FreeBSD.org) Received: (from andrew@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u4DG3oTl005654; Fri, 13 May 2016 16:03:50 GMT (envelope-from andrew@FreeBSD.org) Message-Id: <201605131603.u4DG3oTl005654@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: andrew set sender to andrew@FreeBSD.org using -f From: Andrew Turner Date: Fri, 13 May 2016 16:03:50 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r299683 - in head/sys/arm64: arm64 include X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 13 May 2016 16:03:51 -0000 Author: andrew Date: Fri May 13 16:03:50 2016 New Revision: 299683 URL: https://svnweb.freebsd.org/changeset/base/299683 Log: Add support to the arm64 busdma to handle the cache. For now this is disabled, however when we enable it it will default to assume memory is not cache-coherent, unless either the tag was created or the parent was marked as cache-coherent. Obtained from: ABT Systems Ltd Relnotes: yes Sponsored by: The FreeBSD Foundation Modified: head/sys/arm64/arm64/busdma_bounce.c head/sys/arm64/include/cpufunc.h Modified: head/sys/arm64/arm64/busdma_bounce.c ============================================================================== --- head/sys/arm64/arm64/busdma_bounce.c Fri May 13 15:57:55 2016 (r299682) +++ head/sys/arm64/arm64/busdma_bounce.c Fri May 13 16:03:50 2016 (r299683) @@ -1,8 +1,11 @@ /*- * Copyright (c) 1997, 1998 Justin T. Gibbs. - * Copyright (c) 2015 The FreeBSD Foundation + * Copyright (c) 2015-2016 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by Andrew Turner + * under sponsorship of the FreeBSD Foundation. + * * Portions of this software were developed by Semihalf * under sponsorship of the FreeBSD Foundation. * @@ -62,6 +65,7 @@ enum { BF_COULD_BOUNCE = 0x01, BF_MIN_ALLOC_COMP = 0x02, BF_KMEM_ALLOC = 0x04, + BF_COHERENT = 0x10, }; struct bounce_zone; @@ -113,6 +117,13 @@ static SYSCTL_NODE(_hw, OID_AUTO, busdma SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); +struct sync_list { + vm_offset_t vaddr; /* kva of client data */ + bus_addr_t paddr; /* physical address */ + vm_page_t pages; /* starting page of client data */ + bus_size_t datacount; /* client data count */ +}; + struct bus_dmamap { struct bp_list bpages; int pagesneeded; @@ -125,6 +136,8 @@ struct bus_dmamap { u_int flags; #define DMAMAP_COULD_BOUNCE (1 << 0) #define DMAMAP_FROM_DMAMEM (1 << 1) + int sync_count; + struct sync_list slist[]; }; static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; @@ -171,9 +184,19 @@ bounce_bus_dma_tag_create(bus_dma_tag_t newtag->map_count = 0; newtag->segments = NULL; - if (parent != NULL && ((newtag->common.filter != NULL) || - ((parent->bounce_flags & BF_COULD_BOUNCE) != 0))) - newtag->bounce_flags |= BF_COULD_BOUNCE; +#ifdef notyet + if ((flags & BUS_DMA_COHERENT) != 0) + newtag->bounce_flags |= BF_COHERENT; +#endif + + if (parent != NULL) { + if ((newtag->common.filter != NULL || + (parent->bounce_flags & BF_COULD_BOUNCE) != 0)) + newtag->bounce_flags |= BF_COULD_BOUNCE; + + /* Copy some flags from the parent */ + newtag->bounce_flags |= parent->bounce_flags & BF_COHERENT; + } if (newtag->common.lowaddr < ptoa((vm_paddr_t)Maxmem) || newtag->common.alignment > 1) @@ -251,11 +274,14 @@ out: } static bus_dmamap_t -alloc_dmamap(int flags) +alloc_dmamap(bus_dma_tag_t dmat, int flags) { + u_long mapsize; bus_dmamap_t map; - map = malloc(sizeof(*map), M_DEVBUF, flags | M_ZERO); + mapsize = sizeof(*map); + mapsize += sizeof(struct sync_list) * dmat->common.nsegments; + map = malloc(mapsize, M_DEVBUF, flags | M_ZERO); if (map == NULL) return (NULL); @@ -288,7 +314,7 @@ bounce_bus_dmamap_create(bus_dma_tag_t d } } - *mapp = alloc_dmamap(M_NOWAIT); + *mapp = alloc_dmamap(dmat, M_NOWAIT); if (*mapp == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); @@ -360,7 +386,7 @@ bounce_bus_dmamap_destroy(bus_dma_tag_t if ((map->flags & DMAMAP_FROM_DMAMEM) != 0) panic("bounce_bus_dmamap_destroy: Invalid map freed\n"); - if (STAILQ_FIRST(&map->bpages) != NULL) { + if (STAILQ_FIRST(&map->bpages) != NULL || map->sync_count != 0) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); return (EBUSY); } @@ -421,7 +447,7 @@ bounce_bus_dmamem_alloc(bus_dma_tag_t dm * Create the map, but don't set the could bounce flag as * this allocation should never bounce; */ - *mapp = alloc_dmamap(mflags); + *mapp = alloc_dmamap(dmat, mflags); if (*mapp == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->common.flags, ENOMEM); @@ -644,8 +670,9 @@ bounce_bus_dmamap_load_phys(bus_dma_tag_ vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { + struct sync_list *sl; bus_size_t sgsize; - bus_addr_t curaddr; + bus_addr_t curaddr, sl_end; int error; if (segs == NULL) @@ -660,6 +687,9 @@ bounce_bus_dmamap_load_phys(bus_dma_tag_ } } + sl = map->slist + map->sync_count - 1; + sl_end = 0; + while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->common.maxsegsz); @@ -669,6 +699,23 @@ bounce_bus_dmamap_load_phys(bus_dma_tag_ sgsize = MIN(sgsize, PAGE_SIZE - (curaddr & PAGE_MASK)); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); + } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { + if (map->sync_count > 0) + sl_end = sl->paddr + sl->datacount; + + if (map->sync_count == 0 || curaddr != sl_end) { + if (++map->sync_count > dmat->common.nsegments) + break; + sl++; + sl->vaddr = 0; + sl->paddr = curaddr; + sl->datacount = sgsize; + sl->pages = PHYS_TO_VM_PAGE(curaddr); + KASSERT(sl->pages != NULL, + ("%s: page at PA:0x%08lx is not in " + "vm_page_array", __func__, curaddr)); + } else + sl->datacount += sgsize; } sgsize = _bus_dmamap_addseg(dmat, map, curaddr, sgsize, segs, segp); @@ -693,9 +740,10 @@ bounce_bus_dmamap_load_buffer(bus_dma_ta bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { + struct sync_list *sl; bus_size_t sgsize, max_sgsize; - bus_addr_t curaddr; - vm_offset_t kvaddr, vaddr; + bus_addr_t curaddr, sl_pend; + vm_offset_t kvaddr, vaddr, sl_vend; int error; if (segs == NULL) @@ -710,7 +758,11 @@ bounce_bus_dmamap_load_buffer(bus_dma_ta } } + sl = map->slist + map->sync_count - 1; vaddr = (vm_offset_t)buf; + sl_pend = 0; + sl_vend = 0; + while (buflen > 0) { /* * Get the physical address for this segment. @@ -735,6 +787,34 @@ bounce_bus_dmamap_load_buffer(bus_dma_ta sgsize = MIN(sgsize, max_sgsize); curaddr = add_bounce_page(dmat, map, kvaddr, curaddr, sgsize); + } else if ((dmat->bounce_flags & BF_COHERENT) == 0) { + sgsize = MIN(sgsize, max_sgsize); + if (map->sync_count > 0) { + sl_pend = sl->paddr + sl->datacount; + sl_vend = sl->vaddr + sl->datacount; + } + + if (map->sync_count == 0 || + (kvaddr != 0 && kvaddr != sl_vend) || + (curaddr != sl_pend)) { + + if (++map->sync_count > dmat->common.nsegments) + goto cleanup; + sl++; + sl->vaddr = kvaddr; + sl->paddr = curaddr; + if (kvaddr != 0) { + sl->pages = NULL; + } else { + sl->pages = PHYS_TO_VM_PAGE(curaddr); + KASSERT(sl->pages != NULL, + ("%s: page at PA:0x%08lx is not " + "in vm_page_array", __func__, + curaddr)); + } + sl->datacount = sgsize; + } else + sl->datacount += sgsize; } else { sgsize = MIN(sgsize, max_sgsize); } @@ -746,6 +826,7 @@ bounce_bus_dmamap_load_buffer(bus_dma_ta buflen -= sgsize; } +cleanup: /* * Did we fit? */ @@ -783,13 +864,87 @@ bounce_bus_dmamap_unload(bus_dma_tag_t d { struct bounce_page *bpage; - if ((map->flags & DMAMAP_COULD_BOUNCE) == 0) - return; - while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); } + + map->sync_count = 0; +} + +static void +dma_preread_safe(vm_offset_t va, vm_size_t size) +{ + /* + * Write back any partial cachelines immediately before and + * after the DMA region. + */ + if (va & (dcache_line_size - 1)) + cpu_dcache_wb_range(va, 1); + if ((va + size) & (dcache_line_size - 1)) + cpu_dcache_wb_range(va + size, 1); + + cpu_dcache_inv_range(va, size); +} + +static void +dma_dcache_sync(struct sync_list *sl, bus_dmasync_op_t op) +{ + uint32_t len, offset; + vm_page_t m; + vm_paddr_t pa; + vm_offset_t va, tempva; + bus_size_t size; + + offset = sl->paddr & PAGE_MASK; + m = sl->pages; + size = sl->datacount; + pa = sl->paddr; + + for ( ; size != 0; size -= len, pa += len, offset = 0, ++m) { + tempva = 0; + if (sl->vaddr == 0) { + len = min(PAGE_SIZE - offset, size); + tempva = pmap_quick_enter_page(m); + va = tempva | offset; + KASSERT(pa == (VM_PAGE_TO_PHYS(m) | offset), + ("unexpected vm_page_t phys: 0x%16lx != 0x%16lx", + VM_PAGE_TO_PHYS(m) | offset, pa)); + } else { + len = sl->datacount; + va = sl->vaddr; + } + + switch (op) { + case BUS_DMASYNC_PREWRITE: + case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD: + cpu_dcache_wb_range(va, len); + break; + case BUS_DMASYNC_PREREAD: + /* + * An mbuf may start in the middle of a cacheline. There + * will be no cpu writes to the beginning of that line + * (which contains the mbuf header) while dma is in + * progress. Handle that case by doing a writeback of + * just the first cacheline before invalidating the + * overall buffer. Any mbuf in a chain may have this + * misalignment. Buffers which are not mbufs bounce if + * they are not aligned to a cacheline. + */ + dma_preread_safe(va, len); + break; + case BUS_DMASYNC_POSTREAD: + case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE: + cpu_dcache_inv_range(va, len); + break; + default: + panic("unsupported combination of sync operations: " + "0x%08x\n", op); + } + + if (tempva != 0) + pmap_quick_remove_page(tempva); + } } static void @@ -797,15 +952,9 @@ bounce_bus_dmamap_sync(bus_dma_tag_t dma bus_dmasync_op_t op) { struct bounce_page *bpage; + struct sync_list *sl, *end; vm_offset_t datavaddr, tempvaddr; - /* - * XXX ARM64TODO: - * This bus_dma implementation requires IO-Coherent architecutre. - * If IO-Coherency is not guaranteed, cache operations have to be - * added to this function. - */ - if ((op & BUS_DMASYNC_POSTREAD) != 0) { /* * Wait for any DMA operations to complete before the bcopy. @@ -832,13 +981,26 @@ bounce_bus_dmamap_sync(bus_dma_tag_t dma (void *)bpage->vaddr, bpage->datacount); if (tempvaddr != 0) pmap_quick_remove_page(tempvaddr); + if ((dmat->bounce_flags & BF_COHERENT) == 0) + cpu_dcache_wb_range(bpage->vaddr, + bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; + } else if ((op & BUS_DMASYNC_PREREAD) != 0) { + while (bpage != NULL) { + if ((dmat->bounce_flags & BF_COHERENT) == 0) + cpu_dcache_wbinv_range(bpage->vaddr, + bpage->datacount); + bpage = STAILQ_NEXT(bpage, links); + } } if ((op & BUS_DMASYNC_POSTREAD) != 0) { while (bpage != NULL) { + if ((dmat->bounce_flags & BF_COHERENT) == 0) + cpu_dcache_inv_range(bpage->vaddr, + bpage->datacount); tempvaddr = 0; datavaddr = bpage->datavaddr; if (datavaddr == 0) { @@ -858,7 +1020,20 @@ bounce_bus_dmamap_sync(bus_dma_tag_t dma } } - if ((op & BUS_DMASYNC_PREWRITE) != 0) { + /* + * Cache maintenance for normal (non-COHERENT non-bounce) buffers. + */ + if (map->sync_count != 0) { + sl = &map->slist[0]; + end = &map->slist[map->sync_count]; + CTR3(KTR_BUSDMA, "%s: tag %p op 0x%x " + "performing sync", __func__, dmat, op); + + for ( ; sl != end; ++sl) + dma_dcache_sync(sl, op); + } + + if ((op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE)) != 0) { /* * Wait for the bcopy to complete before any DMA operations. */ Modified: head/sys/arm64/include/cpufunc.h ============================================================================== --- head/sys/arm64/include/cpufunc.h Fri May 13 15:57:55 2016 (r299682) +++ head/sys/arm64/include/cpufunc.h Fri May 13 16:03:50 2016 (r299683) @@ -119,6 +119,11 @@ clrex(void) __asm __volatile("clrex" : : : "memory"); } +extern int64_t dcache_line_size; +extern int64_t icache_line_size; +extern int64_t idcache_line_size; +extern int64_t dczva_line_size; + #define cpu_nullop() arm64_nullop() #define cpufunc_nullop() arm64_nullop() #define cpu_setttb(a) arm64_setttb(a)