Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 27 Dec 2014 01:28:52 +0000 (UTC)
From:      Ian Lepore <ian@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r276274 - stable/10/sys/arm/arm
Message-ID:  <201412270128.sBR1Sq4J050987@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: ian
Date: Sat Dec 27 01:28:52 2014
New Revision: 276274
URL: https://svnweb.freebsd.org/changeset/base/276274

Log:
  MFC r274538, r274545, r274596, r274602, r274603, r274604, r274605, r274839:
  
    When doing busdma sync ops for BUSDMA_COHERENT memory, there is no need
    for cache maintenance operations, but ensure that all prior writes have
    reached memory when doing a PREWRITE sync.
  
    Do not do a cache invalidate on a PREREAD sync that is also a PREWRITE sync.
  
    Do the cache invalidate sequence from the outermost to innermost, required
    for correct operation.
  
    Correct the sequence of busdma sync ops involved with PRE/POSTREAD syncs.
  
    When doing a PREREAD sync of an mbuf-type dma buffer, do a writeback of
    the first cacheline if the buffer start address is not on a cacheline
    boundary.

Modified:
  stable/10/sys/arm/arm/busdma_machdep-v6.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/arm/arm/busdma_machdep-v6.c
==============================================================================
--- stable/10/sys/arm/arm/busdma_machdep-v6.c	Sat Dec 27 01:06:19 2014	(r276273)
+++ stable/10/sys/arm/arm/busdma_machdep-v6.c	Sat Dec 27 01:28:52 2014	(r276274)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2012 Ian Lepore
+ * Copyright (c) 2012-2014 Ian Lepore
  * Copyright (c) 2010 Mark Tinguely
  * Copyright (c) 2004 Olivier Houchard
  * Copyright (c) 2002 Peter Grehan
@@ -322,6 +322,7 @@ static __inline int
 might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, 
     bus_size_t size)
 {
+
 	return ((dmat->flags & BUS_DMA_EXCL_BOUNCE) ||
 	    alignment_bounce(dmat, addr) ||
 	    cacheline_bounce(map, addr, size));
@@ -420,6 +421,7 @@ busdma_lock_mutex(void *arg, bus_dma_loc
 static void
 dflt_lock(void *arg, bus_dma_lock_op_t op)
 {
+
 	panic("driver error: busdma dflt_lock called");
 }
 
@@ -600,7 +602,7 @@ out:
 
 static int allocate_bz_and_pages(bus_dma_tag_t dmat, bus_dmamap_t mapp)
 {
-        struct bounce_zone *bz;
+	struct bounce_zone *bz;
 	int maxpages;
 	int error;
 		
@@ -1227,13 +1229,13 @@ _bus_dmamap_unload(bus_dma_tag_t dmat, b
 }
 
 #ifdef notyetbounceuser
-	/* If busdma uses user pages, then the interrupt handler could
-	 * be use the kernel vm mapping. Both bounce pages and sync list
-	 * do not cross page boundaries.
-	 * Below is a rough sequence that a person would do to fix the
-	 * user page reference in the kernel vmspace. This would be
-	 * done in the dma post routine.
-	 */
+/* If busdma uses user pages, then the interrupt handler could
+ * be use the kernel vm mapping. Both bounce pages and sync list
+ * do not cross page boundaries.
+ * Below is a rough sequence that a person would do to fix the
+ * user page reference in the kernel vmspace. This would be
+ * done in the dma post routine.
+ */
 void
 _bus_dmamap_fix_user(vm_offset_t buf, bus_size_t len,
 			pmap_t pmap, int op)
@@ -1242,10 +1244,10 @@ _bus_dmamap_fix_user(vm_offset_t buf, bu
 	bus_addr_t curaddr;
 	vm_offset_t va;
 
-		/* each synclist entry is contained within a single page.
-		 *
-		 * this would be needed if BUS_DMASYNC_POSTxxxx was implemented
-		*/
+	/* 
+	 * each synclist entry is contained within a single page.
+	 * this would be needed if BUS_DMASYNC_POSTxxxx was implemented
+	 */
 	curaddr = pmap_extract(pmap, buf);
 	va = pmap_dma_map(curaddr);
 	switch (op) {
@@ -1287,17 +1289,20 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus
 	/*
 	 * If the buffer was from user space, it is possible that this is not
 	 * the same vm map, especially on a POST operation.  It's not clear that
-	 * dma on userland buffers can work at all right now, certainly not if a
-	 * partial cacheline flush has to be handled.  To be safe, until we're
-	 * able to test direct userland dma, panic on a map mismatch.
+	 * dma on userland buffers can work at all right now.  To be safe, until
+	 * we're able to test direct userland dma, panic on a map mismatch.
 	 */
 	if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) {
 		if (!pmap_dmap_iscurrent(map->pmap))
 			panic("_bus_dmamap_sync: wrong user map for bounce sync.");
-		/* Handle data bouncing. */
+
 		CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x "
 		    "performing bounce", __func__, dmat, dmat->flags, op);
 
+		/*
+		 * For PREWRITE do a writeback.  Clean the caches from the
+		 * innermost to the outermost levels.
+		 */
 		if (op & BUS_DMASYNC_PREWRITE) {
 			while (bpage != NULL) {
 				if (bpage->datavaddr != 0)
@@ -1309,7 +1314,7 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus
 					    (void *)bpage->vaddr,
 					    bpage->datacount);
 				cpu_dcache_wb_range((vm_offset_t)bpage->vaddr,
-					bpage->datacount);
+				    bpage->datacount);
 				l2cache_wb_range((vm_offset_t)bpage->vaddr,
 				    (vm_offset_t)bpage->busaddr, 
 				    bpage->datacount);
@@ -1318,7 +1323,18 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus
 			dmat->bounce_zone->total_bounced++;
 		}
 
-		if (op & BUS_DMASYNC_PREREAD) {
+		/*
+		 * Do an invalidate for PREREAD unless a writeback was already
+		 * done above due to PREWRITE also being set.  The reason for a
+		 * PREREAD invalidate is to prevent dirty lines currently in the
+		 * cache from being evicted during the DMA.  If a writeback was
+		 * done due to PREWRITE also being set there will be no dirty
+		 * lines and the POSTREAD invalidate handles the rest. The
+		 * invalidate is done from the innermost to outermost level. If
+		 * L2 were done first, a dirty cacheline could be automatically
+		 * evicted from L1 before we invalidated it, re-dirtying the L2.
+		 */
+		if ((op & BUS_DMASYNC_PREREAD) && !(op & BUS_DMASYNC_PREWRITE)) {
 			bpage = STAILQ_FIRST(&map->bpages);
 			while (bpage != NULL) {
 				cpu_dcache_inv_range((vm_offset_t)bpage->vaddr,
@@ -1329,6 +1345,16 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus
 				bpage = STAILQ_NEXT(bpage, links);
 			}
 		}
+
+		/*
+		 * Re-invalidate the caches on a POSTREAD, even though they were
+		 * already invalidated at PREREAD time.  Aggressive prefetching
+		 * due to accesses to other data near the dma buffer could have
+		 * brought buffer data into the caches which is now stale.  The
+		 * caches are invalidated from the outermost to innermost; the
+		 * prefetches could be happening right now, and if L1 were
+		 * invalidated first, stale L2 data could be prefetched into L1.
+		 */
 		if (op & BUS_DMASYNC_POSTREAD) {
 			while (bpage != NULL) {
 				vm_offset_t startv;
@@ -1345,8 +1371,8 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus
 					len = (len -
 					    (len & arm_dcache_align_mask)) +
 					    arm_dcache_align;
-				cpu_dcache_inv_range(startv, len);
 				l2cache_inv_range(startv, startp, len);
+				cpu_dcache_inv_range(startv, len);
 				if (bpage->datavaddr != 0)
 					bcopy((void *)bpage->vaddr,
 					    (void *)bpage->datavaddr,
@@ -1360,13 +1386,33 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus
 			dmat->bounce_zone->total_bounced++;
 		}
 	}
-	if (map->flags & DMAMAP_COHERENT)
+
+	/*
+	 * For COHERENT memory no cache maintenance is necessary, but ensure all
+	 * writes have reached memory for the PREWRITE case.  No action is
+	 * needed for a PREREAD without PREWRITE also set, because that would
+	 * imply that the cpu had written to the COHERENT buffer and expected
+	 * the dma device to see that change, and by definition a PREWRITE sync
+	 * is required to make that happen.
+	 */
+	if (map->flags & DMAMAP_COHERENT) {
+		if (op & BUS_DMASYNC_PREWRITE) {
+			dsb();
+			cpu_l2cache_drain_writebuf();
+		}
 		return;
+	}
 
+	/*
+	 * Cache maintenance for normal (non-COHERENT non-bounce) buffers.  All
+	 * the comments about the sequences for flushing cache levels in the
+	 * bounce buffer code above apply here as well.  In particular, the fact
+	 * that the sequence is inner-to-outer for PREREAD invalidation and
+	 * outer-to-inner for POSTREAD invalidation is not a mistake.
+	 */
 	if (map->sync_count != 0) {
 		if (!pmap_dmap_iscurrent(map->pmap))
 			panic("_bus_dmamap_sync: wrong user map for sync.");
-		/* ARM caches are not self-snooping for dma */
 
 		sl = &map->slist[0];
 		end = &map->slist[map->sync_count];
@@ -1375,16 +1421,34 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus
 
 		switch (op) {
 		case BUS_DMASYNC_PREWRITE:
+		case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
 			while (sl != end) {
-			    cpu_dcache_wb_range(sl->vaddr, sl->datacount);
-			    l2cache_wb_range(sl->vaddr, sl->busaddr,
-				sl->datacount);
-			    sl++;
+				cpu_dcache_wb_range(sl->vaddr, sl->datacount);
+				l2cache_wb_range(sl->vaddr, sl->busaddr,
+				    sl->datacount);
+				sl++;
 			}
 			break;
 
 		case BUS_DMASYNC_PREREAD:
+			/*
+			 * An mbuf may start in the middle of a cacheline. There
+			 * will be no cpu writes to the beginning of that line
+			 * (which contains the mbuf header) while dma is in
+			 * progress.  Handle that case by doing a writeback of
+			 * just the first cacheline before invalidating the
+			 * overall buffer.  Any mbuf in a chain may have this
+			 * misalignment.  Buffers which are not mbufs bounce if
+			 * they are not aligned to a cacheline.
+			 */
 			while (sl != end) {
+				if (sl->vaddr & arm_dcache_align_mask) {
+					KASSERT(map->flags & DMAMAP_MBUF,
+					    ("unaligned buffer is not an mbuf"));
+					cpu_dcache_wb_range(sl->vaddr, 1);
+					l2cache_wb_range(sl->vaddr,
+					    sl->busaddr, 1);
+				}
 				cpu_dcache_inv_range(sl->vaddr, sl->datacount);
 				l2cache_inv_range(sl->vaddr, sl->busaddr, 
 				    sl->datacount);
@@ -1392,19 +1456,19 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus
 			}
 			break;
 
-		case BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD:
-			while (sl != end) {
-				cpu_dcache_wbinv_range(sl->vaddr, sl->datacount);
-				l2cache_wbinv_range(sl->vaddr,
-				    sl->busaddr, sl->datacount);
-				sl++;
-			}
+		case BUS_DMASYNC_POSTWRITE:
 			break;
 
 		case BUS_DMASYNC_POSTREAD:
-		case BUS_DMASYNC_POSTWRITE:
 		case BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE:
+			while (sl != end) {
+				l2cache_inv_range(sl->vaddr, sl->busaddr, 
+				    sl->datacount);
+				cpu_dcache_inv_range(sl->vaddr, sl->datacount);
+				sl++;
+			}
 			break;
+
 		default:
 			panic("unsupported combination of sync operations: 0x%08x\n", op);
 			break;
@@ -1427,12 +1491,14 @@ SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_AN
 static struct sysctl_ctx_list *
 busdma_sysctl_tree(struct bounce_zone *bz)
 {
+
 	return (&bz->sysctl_tree);
 }
 
 static struct sysctl_oid *
 busdma_sysctl_tree_top(struct bounce_zone *bz)
 {
+
 	return (bz->sysctl_tree_top);
 }
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201412270128.sBR1Sq4J050987>