From owner-svn-src-all@FreeBSD.ORG Mon Aug 11 01:10:16 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 90244636 for ; Mon, 11 Aug 2014 01:10:16 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1 with cipher ECDHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 7DD1825A2 for ; Mon, 11 Aug 2014 01:10:16 +0000 (UTC) Received: from ian (uid 1311) (envelope-from ian@FreeBSD.org) id 2b97 by svn.freebsd.org (DragonFly Mail Agent v0.9+); Mon, 11 Aug 2014 01:10:16 +0000 From: Ian Lepore Date: Mon, 11 Aug 2014 01:10:16 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r269794 - stable/10/sys/arm/arm X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Message-Id: <53e817f8.2b97.d400ca7@svn.freebsd.org> X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 11 Aug 2014 01:10:16 -0000 Author: ian Date: Mon Aug 11 01:10:15 2014 New Revision: 269794 URL: http://svnweb.freebsd.org/changeset/base/269794 Log: MFC r269206, r269207, r269208, r269209, r269210, r269211, r269212, r269213, r269214, r269215, r269216, r269217, r269221: busdma-v6 improvements, primarily: - Allocate the temporary segments array per-map rather than per-tag. - Avoid needlessly bouncing IO for mbufs and buffers allocated by bus_dmamem_alloc() (in both situations we known they're allocated on cacheline boundaries and don't need bouncing). - Various minor reformatting and cleanups. Modified: stable/10/sys/arm/arm/busdma_machdep-v6.c Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/arm/arm/busdma_machdep-v6.c ============================================================================== --- stable/10/sys/arm/arm/busdma_machdep-v6.c Mon Aug 11 00:03:50 2014 (r269793) +++ stable/10/sys/arm/arm/busdma_machdep-v6.c Mon Aug 11 01:10:15 2014 (r269794) @@ -64,7 +64,10 @@ __FBSDID("$FreeBSD$"); #include #define MAX_BPAGES 64 -#define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 +#define MAX_DMA_SEGMENTS 4096 +#define BUS_DMA_EXCL_BOUNCE BUS_DMA_BUS2 +#define BUS_DMA_ALIGN_BOUNCE BUS_DMA_BUS3 +#define BUS_DMA_COULD_BOUNCE (BUS_DMA_EXCL_BOUNCE | BUS_DMA_ALIGN_BOUNCE) #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 struct bounce_zone; @@ -94,15 +97,6 @@ struct bus_dma_tag { */ struct arm32_dma_range *ranges; int _nranges; - /* - * Most tags need one or two segments, and can use the local tagsegs - * array. For tags with a larger limit, we'll allocate a bigger array - * on first use. - */ - bus_dma_segment_t *segments; - bus_dma_segment_t tagsegs[2]; - - }; struct bounce_page { @@ -143,9 +137,40 @@ struct bounce_zone { static struct mtx bounce_lock; static int total_bpages; static int busdma_zonecount; +static uint32_t tags_total; +static uint32_t maps_total; +static uint32_t maps_dmamem; +static uint32_t maps_coherent; +static uint64_t maploads_total; +static uint64_t maploads_bounced; +static uint64_t maploads_coherent; +static uint64_t maploads_dmamem; +static uint64_t maploads_mbuf; +static uint64_t maploads_physmem; + static STAILQ_HEAD(, bounce_zone) bounce_zone_list; SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); +SYSCTL_UINT(_hw_busdma, OID_AUTO, tags_total, CTLFLAG_RD, &tags_total, 0, + "Number of active tags"); +SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_total, CTLFLAG_RD, &maps_total, 0, + "Number of active maps"); +SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_dmamem, CTLFLAG_RD, &maps_dmamem, 0, + "Number of active maps for bus_dmamem_alloc buffers"); +SYSCTL_UINT(_hw_busdma, OID_AUTO, maps_coherent, CTLFLAG_RD, &maps_coherent, 0, + "Number of active maps with BUS_DMA_COHERENT flag set"); +SYSCTL_UQUAD(_hw_busdma, OID_AUTO, maploads_total, CTLFLAG_RD, &maploads_total, 0, + "Number of load operations performed"); +SYSCTL_UQUAD(_hw_busdma, OID_AUTO, maploads_bounced, CTLFLAG_RD, &maploads_bounced, 0, + "Number of load operations that used bounce buffers"); +SYSCTL_UQUAD(_hw_busdma, OID_AUTO, maploads_coherent, CTLFLAG_RD, &maploads_dmamem, 0, + "Number of load operations on BUS_DMA_COHERENT memory"); +SYSCTL_UQUAD(_hw_busdma, OID_AUTO, maploads_dmamem, CTLFLAG_RD, &maploads_dmamem, 0, + "Number of load operations on bus_dmamem_alloc buffers"); +SYSCTL_UQUAD(_hw_busdma, OID_AUTO, maploads_mbuf, CTLFLAG_RD, &maploads_mbuf, 0, + "Number of load operations for mbufs"); +SYSCTL_UQUAD(_hw_busdma, OID_AUTO, maploads_physmem, CTLFLAG_RD, &maploads_physmem, 0, + "Number of load operations on physical buffers"); SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, "Total bounce pages"); @@ -160,7 +185,10 @@ struct bus_dmamap { void *callback_arg; int flags; #define DMAMAP_COHERENT (1 << 0) +#define DMAMAP_DMAMEM_ALLOC (1 << 1) +#define DMAMAP_MBUF (1 << 2) STAILQ_ENTRY(bus_dmamap) links; + bus_dma_segment_t *segments; int sync_count; struct sync_list slist[]; }; @@ -177,7 +205,6 @@ static bus_addr_t add_bounce_page(bus_dm vm_offset_t vaddr, bus_addr_t addr, bus_size_t size); static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); -int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr, bus_size_t size, int coherent); static void _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, int flags); static void _bus_dmamap_count_phys(bus_dma_tag_t dmat, bus_dmamap_t map, @@ -229,62 +256,135 @@ busdma_init(void *dummy) */ SYSINIT(busdma, SI_SUB_KMEM, SI_ORDER_THIRD, busdma_init, NULL); -static __inline int -_bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr) +static int +exclusion_bounce_check(vm_offset_t lowaddr, vm_offset_t highaddr) { int i; for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) { - if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1]) - || (lowaddr < phys_avail[i] && - highaddr > phys_avail[i])) + if ((lowaddr >= phys_avail[i] && lowaddr < phys_avail[i + 1]) || + (lowaddr < phys_avail[i] && highaddr >= phys_avail[i])) return (1); } return (0); } -static __inline struct arm32_dma_range * -_bus_dma_inrange(struct arm32_dma_range *ranges, int nranges, - bus_addr_t curaddr) +/* + * Return true if the tag has an exclusion zone that could lead to bouncing. + */ +static __inline int +exclusion_bounce(bus_dma_tag_t dmat) { - struct arm32_dma_range *dr; - int i; - for (i = 0, dr = ranges; i < nranges; i++, dr++) { - if (curaddr >= dr->dr_sysbase && - round_page(curaddr) <= (dr->dr_sysbase + dr->dr_len)) - return (dr); - } + return (dmat->flags & BUS_DMA_EXCL_BOUNCE); +} - return (NULL); +/* + * Return true if the given address does not fall on the alignment boundary. + */ +static __inline int +alignment_bounce(bus_dma_tag_t dmat, bus_addr_t addr) +{ + + return (addr & (dmat->alignment - 1)); +} + +/* + * Return true if the DMA should bounce because the start or end does not fall + * on a cacheline boundary (which would require a partial cacheline flush). + * COHERENT memory doesn't trigger cacheline flushes. Memory allocated by + * bus_dmamem_alloc() is always aligned to cacheline boundaries, and there's a + * strict rule that such memory cannot be accessed by the CPU while DMA is in + * progress (or by multiple DMA engines at once), so that it's always safe to do + * full cacheline flushes even if that affects memory outside the range of a + * given DMA operation that doesn't involve the full allocated buffer. If we're + * mapping an mbuf, that follows the same rules as a buffer we allocated. + */ +static __inline int +cacheline_bounce(bus_dmamap_t map, bus_addr_t addr, bus_size_t size) +{ + + if (map->flags & (DMAMAP_DMAMEM_ALLOC | DMAMAP_COHERENT | DMAMAP_MBUF)) + return (0); + return ((addr | size) & arm_dcache_align_mask); } /* - * Return true if a match is made. + * Return true if we might need to bounce the DMA described by addr and size. * - * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. + * This is used to quick-check whether we need to do the more expensive work of + * checking the DMA page-by-page looking for alignment and exclusion bounces. * - * If paddr is within the bounds of the dma tag then call the filter callback - * to check for a match, if there is no filter callback then assume a match. + * Note that the addr argument might be either virtual or physical. It doesn't + * matter because we only look at the low-order bits, which are the same in both + * address spaces. */ -int -run_filter(bus_dma_tag_t dmat, bus_addr_t paddr, bus_size_t size, int coherent) +static __inline int +might_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t addr, + bus_size_t size) { - int retval; + return ((dmat->flags & BUS_DMA_EXCL_BOUNCE) || + alignment_bounce(dmat, addr) || + cacheline_bounce(map, addr, size)); +} - retval = 0; +/* + * Return true if we must bounce the DMA described by paddr and size. + * + * Bouncing can be triggered by DMA that doesn't begin and end on cacheline + * boundaries, or doesn't begin on an alignment boundary, or falls within the + * exclusion zone of any tag in the ancestry chain. + * + * For exclusions, walk the chain of tags comparing paddr to the exclusion zone + * within each tag. If the tag has a filter function, use it to decide whether + * the DMA needs to bounce, otherwise any DMA within the zone bounces. + */ +static int +must_bounce(bus_dma_tag_t dmat, bus_dmamap_t map, bus_addr_t paddr, + bus_size_t size) +{ - do { - if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) - || ((paddr & (dmat->alignment - 1)) != 0) || - (!coherent && (size & arm_dcache_align_mask)) || - (!coherent && (paddr & arm_dcache_align_mask))) - && (dmat->filter == NULL - || (*dmat->filter)(dmat->filterarg, paddr) != 0)) - retval = 1; + if (cacheline_bounce(map, paddr, size)) + return (1); + /* + * The tag already contains ancestors' alignment restrictions so this + * check doesn't need to be inside the loop. + */ + if (alignment_bounce(dmat, paddr)) + return (1); + + /* + * Even though each tag has an exclusion zone that is a superset of its + * own and all its ancestors' exclusions, the exclusion zone of each tag + * up the chain must be checked within the loop, because the busdma + * rules say the filter function is called only when the address lies + * within the low-highaddr range of the tag that filterfunc belongs to. + */ + while (dmat != NULL && exclusion_bounce(dmat)) { + if ((paddr >= dmat->lowaddr && paddr <= dmat->highaddr) && + (dmat->filter == NULL || + dmat->filter(dmat->filterarg, paddr) != 0)) + return (1); dmat = dmat->parent; - } while (retval == 0 && dmat != NULL); - return (retval); + } + + return (0); +} + +static __inline struct arm32_dma_range * +_bus_dma_inrange(struct arm32_dma_range *ranges, int nranges, + bus_addr_t curaddr) +{ + struct arm32_dma_range *dr; + int i; + + for (i = 0, dr = ranges; i < nranges; i++, dr++) { + if (curaddr >= dr->dr_sysbase && + round_page(curaddr) <= (dr->dr_sysbase + dr->dr_len)) + return (dr); + } + + return (NULL); } /* @@ -384,34 +484,21 @@ bus_dma_tag_create(bus_dma_tag_t parent, newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } - /* - * If all the segments we need fit into the local tagsegs array, set the - * pointer now. Otherwise NULL the pointer and an array of segments - * will be allocated later, on first use. We don't pre-allocate now - * because some tags exist just to pass contraints to children in the - * device hierarchy, and they tend to use BUS_SPACE_UNRESTRICTED and we - * sure don't want to try to allocate an array for that. - */ - if (newtag->nsegments <= nitems(newtag->tagsegs)) - newtag->segments = newtag->tagsegs; - else - newtag->segments = NULL; /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); + newtag->alignment = MAX(parent->alignment, newtag->alignment); + newtag->flags |= parent->flags & BUS_DMA_COULD_BOUNCE; if (newtag->boundary == 0) newtag->boundary = parent->boundary; else if (parent->boundary != 0) newtag->boundary = MIN(parent->boundary, newtag->boundary); - if ((newtag->filter != NULL) || - ((parent->flags & BUS_DMA_COULD_BOUNCE) != 0)) - newtag->flags |= BUS_DMA_COULD_BOUNCE; if (newtag->filter == NULL) { /* - * Short circuit looking at our parent directly + * Short circuit to looking at our parent directly * since we have encapsulated all of its information */ newtag->filter = parent->filter; @@ -422,9 +509,10 @@ bus_dma_tag_create(bus_dma_tag_t parent, atomic_add_int(&parent->ref_count, 1); } - if (_bus_dma_can_bounce(newtag->lowaddr, newtag->highaddr) - || newtag->alignment > 1) - newtag->flags |= BUS_DMA_COULD_BOUNCE; + if (exclusion_bounce_check(newtag->lowaddr, newtag->highaddr)) + newtag->flags |= BUS_DMA_EXCL_BOUNCE; + if (alignment_bounce(newtag, 1)) + newtag->flags |= BUS_DMA_ALIGN_BOUNCE; /* * Any request can auto-bounce due to cacheline alignment, in addition @@ -463,6 +551,7 @@ bus_dma_tag_create(bus_dma_tag_t parent, if (error != 0) { free(newtag, M_DEVBUF); } else { + atomic_add_32(&tags_total, 1); *dmat = newtag; } CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", @@ -492,9 +581,7 @@ bus_dma_tag_destroy(bus_dma_tag_t dmat) parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { - if (dmat->segments != NULL && - dmat->segments != dmat->tagsegs) - free(dmat->segments, M_DEVBUF); + atomic_subtract_32(&tags_total, 1); free(dmat, M_DEVBUF); /* * Last reference count, so @@ -534,8 +621,8 @@ static int allocate_bz_and_pages(bus_dma maxpages = MAX_BPAGES; else maxpages = 2 * bz->map_count; - if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 - || (bz->map_count > 0 && bz->total_bpages < maxpages)) { + if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 || + (bz->map_count > 0 && bz->total_bpages < maxpages)) { int pages; pages = atop(roundup2(dmat->maxsize, PAGE_SIZE)) + 1; @@ -551,6 +638,31 @@ static int allocate_bz_and_pages(bus_dma return (0); } +static bus_dmamap_t +allocate_map(bus_dma_tag_t dmat, int mflags) +{ + int mapsize, segsize; + bus_dmamap_t map; + + /* + * Allocate the map. The map structure ends with an embedded + * variable-sized array of sync_list structures. Following that + * we allocate enough extra space to hold the array of bus_dma_segments. + */ + KASSERT(dmat->nsegments <= MAX_DMA_SEGMENTS, + ("cannot allocate %u dma segments (max is %u)", + dmat->nsegments, MAX_DMA_SEGMENTS)); + segsize = sizeof(struct bus_dma_segment) * dmat->nsegments; + mapsize = sizeof(*map) + sizeof(struct sync_list) * dmat->nsegments; + map = malloc(mapsize + segsize, M_DEVBUF, mflags | M_ZERO); + if (map == NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); + return (NULL); + } + map->segments = (bus_dma_segment_t *)((uintptr_t)map + mapsize); + return (map); +} + /* * Allocate a handle for mapping from kva/uva/physical * address space into bus device space. @@ -558,41 +670,32 @@ static int allocate_bz_and_pages(bus_dma int bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { - int mapsize; + bus_dmamap_t map; int error = 0; - mapsize = sizeof(**mapp) + (sizeof(struct sync_list) * dmat->nsegments); - *mapp = (bus_dmamap_t)malloc(mapsize, M_DEVBUF, M_NOWAIT | M_ZERO); - if (*mapp == NULL) { + *mapp = map = allocate_map(dmat, M_NOWAIT); + if (map == NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM); return (ENOMEM); } - (*mapp)->sync_count = 0; - if (dmat->segments == NULL) { - dmat->segments = (bus_dma_segment_t *)malloc( - sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, - M_NOWAIT); - if (dmat->segments == NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, ENOMEM); - free(*mapp, M_DEVBUF); - *mapp = NULL; - return (ENOMEM); - } - } /* - * Bouncing might be required if the driver asks for an active - * exclusion region, a data alignment that is stricter than 1, and/or - * an active address boundary. + * Bouncing might be required if the driver asks for an exclusion + * region, a data alignment that is stricter than 1, or DMA that begins + * or ends with a partial cacheline. Whether bouncing will actually + * happen can't be known until mapping time, but we need to pre-allocate + * resources now because we might not be allowed to at mapping time. */ - error = allocate_bz_and_pages(dmat, *mapp); + error = allocate_bz_and_pages(dmat, map); if (error != 0) { - free(*mapp, M_DEVBUF); + free(map, M_DEVBUF); *mapp = NULL; return (error); } - return (error); + if (map->flags & DMAMAP_COHERENT) + atomic_add_32(&maps_coherent, 1); + atomic_add_32(&maps_total, 1); + return (0); } /* @@ -609,6 +712,9 @@ bus_dmamap_destroy(bus_dma_tag_t dmat, b } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; + if (map->flags & DMAMAP_COHERENT) + atomic_subtract_32(&maps_coherent, 1); + atomic_subtract_32(&maps_total, 1); free(map, M_DEVBUF); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); @@ -627,58 +733,33 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, voi { busdma_bufalloc_t ba; struct busdma_bufzone *bufzone; + bus_dmamap_t map; vm_memattr_t memattr; int mflags; - int mapsize; - int error; if (flags & BUS_DMA_NOWAIT) mflags = M_NOWAIT; else mflags = M_WAITOK; + if (flags & BUS_DMA_ZERO) + mflags |= M_ZERO; - /* ARM non-snooping caches need a map for the VA cache sync structure */ - - mapsize = sizeof(**mapp) + (sizeof(struct sync_list) * dmat->nsegments); - *mapp = (bus_dmamap_t)malloc(mapsize, M_DEVBUF, M_NOWAIT | M_ZERO); - if (*mapp == NULL) { + *mapp = map = allocate_map(dmat, mflags); + if (map == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); return (ENOMEM); } + map->flags = DMAMAP_DMAMEM_ALLOC; - (*mapp)->sync_count = 0; - /* We may need bounce pages, even for allocated memory */ - error = allocate_bz_and_pages(dmat, *mapp); - if (error != 0) { - free(*mapp, M_DEVBUF); - *mapp = NULL; - return (error); - } - - if (dmat->segments == NULL) { - dmat->segments = (bus_dma_segment_t *)malloc( - sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, - mflags); - if (dmat->segments == NULL) { - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, ENOMEM); - free(*mapp, M_DEVBUF); - *mapp = NULL; - return (ENOMEM); - } - } - - if (flags & BUS_DMA_ZERO) - mflags |= M_ZERO; + /* Choose a busdma buffer allocator based on memory type flags. */ if (flags & BUS_DMA_COHERENT) { memattr = VM_MEMATTR_UNCACHEABLE; ba = coherent_allocator; - (*mapp)->flags |= DMAMAP_COHERENT; + map->flags |= DMAMAP_COHERENT; } else { memattr = VM_MEMATTR_DEFAULT; ba = standard_allocator; - (*mapp)->flags = 0; } /* @@ -702,7 +783,7 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, voi * constraints is something that only the contig allocator can fulfill. */ if (bufzone != NULL && dmat->alignment <= bufzone->size && - !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) { + !exclusion_bounce(dmat)) { *vaddr = uma_zalloc(bufzone->umazone, mflags); } else if (dmat->nsegments >= btoc(dmat->maxsize) && dmat->alignment <= PAGE_SIZE && dmat->boundary == 0) { @@ -718,12 +799,16 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, voi if (*vaddr == NULL) { CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, ENOMEM); - free(*mapp, M_DEVBUF); + free(map, M_DEVBUF); *mapp = NULL; return (ENOMEM); } else if ((uintptr_t)*vaddr & (dmat->alignment - 1)) { printf("bus_dmamem_alloc failed to align memory properly.\n"); } + if (map->flags & DMAMAP_COHERENT) + atomic_add_32(&maps_coherent, 1); + atomic_add_32(&maps_dmamem, 1); + atomic_add_32(&maps_total, 1); dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", @@ -751,12 +836,16 @@ bus_dmamem_free(bus_dma_tag_t dmat, void bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize); if (bufzone != NULL && dmat->alignment <= bufzone->size && - !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) + !exclusion_bounce(dmat)) uma_zfree(bufzone->umazone, vaddr); else kmem_free(kernel_arena, (vm_offset_t)vaddr, dmat->maxsize); dmat->map_count--; + if (map->flags & DMAMAP_COHERENT) + atomic_subtract_32(&maps_coherent, 1); + atomic_subtract_32(&maps_total, 1); + atomic_subtract_32(&maps_dmamem, 1); free(map, M_DEVBUF); CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); } @@ -780,8 +869,7 @@ _bus_dmamap_count_phys(bus_dma_tag_t dma curaddr = buf; while (buflen != 0) { sgsize = MIN(buflen, dmat->maxsegsz); - if (run_filter(dmat, curaddr, sgsize, - map->flags & DMAMAP_COHERENT) != 0) { + if (must_bounce(dmat, map, curaddr, sgsize) != 0) { sgsize = MIN(sgsize, PAGE_SIZE); map->pagesneeded++; } @@ -817,10 +905,9 @@ _bus_dmamap_count_pages(bus_dma_tag_t dm paddr = pmap_kextract(vaddr); else paddr = pmap_extract(map->pmap, vaddr); - if (run_filter(dmat, paddr, - min(vendaddr - vaddr, - (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK))), - map->flags & DMAMAP_COHERENT) != 0) { + if (must_bounce(dmat, map, paddr, + min(vendaddr - vaddr, (PAGE_SIZE - ((vm_offset_t)vaddr & + PAGE_MASK)))) != 0) { map->pagesneeded++; } vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); @@ -934,12 +1021,15 @@ _bus_dmamap_load_phys(bus_dma_tag_t dmat int error; if (segs == NULL) - segs = dmat->segments; + segs = map->segments; + + maploads_total++; + maploads_physmem++; - if (((map->flags & DMAMAP_COHERENT) == 0) || - (dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { + if (might_bounce(dmat, map, buflen, buflen)) { _bus_dmamap_count_phys(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { + maploads_bounced++; error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); @@ -949,10 +1039,8 @@ _bus_dmamap_load_phys(bus_dma_tag_t dmat while (buflen > 0) { curaddr = buf; sgsize = MIN(buflen, dmat->maxsegsz); - if ((((map->flags & DMAMAP_COHERENT) == 0) || - ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0)) && - map->pagesneeded != 0 && run_filter(dmat, curaddr, - sgsize, map->flags & DMAMAP_COHERENT)) { + if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, + sgsize)) { sgsize = MIN(sgsize, PAGE_SIZE); curaddr = add_bounce_page(dmat, map, 0, curaddr, sgsize); @@ -1004,15 +1092,26 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dm struct sync_list *sl; int error; + maploads_total++; + if (map->flags & DMAMAP_COHERENT) + maploads_coherent++; + if (map->flags & DMAMAP_DMAMEM_ALLOC) + maploads_dmamem++; + if (segs == NULL) - segs = dmat->segments; + segs = map->segments; + + if (flags & BUS_DMA_LOAD_MBUF) { + maploads_mbuf++; + map->flags |= DMAMAP_MBUF; + } map->pmap = pmap; - if (!(map->flags & DMAMAP_COHERENT) || - (dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) { + if (might_bounce(dmat, map, (bus_addr_t)buf, buflen)) { _bus_dmamap_count_pages(dmat, map, buf, buflen, flags); if (map->pagesneeded != 0) { + maploads_bounced++; error = _bus_dmamap_reserve_pages(dmat, map, flags); if (error) return (error); @@ -1040,10 +1139,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dm if (buflen < sgsize) sgsize = buflen; - if ((((map->flags & DMAMAP_COHERENT) == 0) || - ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0)) && - map->pagesneeded != 0 && run_filter(dmat, curaddr, - sgsize, map->flags & DMAMAP_COHERENT)) { + if (map->pagesneeded != 0 && must_bounce(dmat, map, curaddr, + sgsize)) { curaddr = add_bounce_page(dmat, map, vaddr, curaddr, sgsize); } else { @@ -1100,7 +1197,7 @@ _bus_dmamap_complete(bus_dma_tag_t dmat, { if (segs == NULL) - segs = dmat->segments; + segs = map->segments; return (segs); } @@ -1126,6 +1223,7 @@ _bus_dmamap_unload(bus_dma_tag_t dmat, b map->pagesneeded = 0; } map->sync_count = 0; + map->flags &= ~DMAMAP_MBUF; } #ifdef notyetbounceuser @@ -1204,12 +1302,12 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus while (bpage != NULL) { if (bpage->datavaddr != 0) bcopy((void *)bpage->datavaddr, - (void *)bpage->vaddr, - bpage->datacount); + (void *)bpage->vaddr, + bpage->datacount); else physcopyout(bpage->dataaddr, - (void *)bpage->vaddr, - bpage->datacount); + (void *)bpage->vaddr, + bpage->datacount); cpu_dcache_wb_range((vm_offset_t)bpage->vaddr, bpage->datacount); l2cache_wb_range((vm_offset_t)bpage->vaddr, @@ -1251,12 +1349,12 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus l2cache_inv_range(startv, startp, len); if (bpage->datavaddr != 0) bcopy((void *)bpage->vaddr, - (void *)bpage->datavaddr, - bpage->datacount); + (void *)bpage->datavaddr, + bpage->datacount); else physcopyin((void *)bpage->vaddr, - bpage->dataaddr, - bpage->datacount); + bpage->dataaddr, + bpage->datacount); bpage = STAILQ_NEXT(bpage, links); } dmat->bounce_zone->total_bounced++; @@ -1345,8 +1443,8 @@ alloc_bounce_zone(bus_dma_tag_t dmat) /* Check to see if we already have a suitable zone */ STAILQ_FOREACH(bz, &bounce_zone_list, links) { - if ((dmat->alignment <= bz->alignment) - && (dmat->lowaddr >= bz->lowaddr)) { + if ((dmat->alignment <= bz->alignment) && + (dmat->lowaddr >= bz->lowaddr)) { dmat->bounce_zone = bz; return (0); } @@ -1397,7 +1495,7 @@ alloc_bounce_zone(bus_dma_tag_t dmat) SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, - "Total bounce requests"); + "Total bounce requests (pages bounced)"); SYSCTL_ADD_INT(busdma_sysctl_tree(bz), SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, @@ -1424,15 +1522,12 @@ alloc_bounce_pages(bus_dma_tag_t dmat, u struct bounce_page *bpage; bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, - M_NOWAIT | M_ZERO); + M_NOWAIT | M_ZERO); if (bpage == NULL) break; bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, - M_NOWAIT, 0ul, - bz->lowaddr, - PAGE_SIZE, - 0); + M_NOWAIT, 0ul, bz->lowaddr, PAGE_SIZE, 0); if (bpage->vaddr == 0) { free(bpage, M_DEVBUF); break; @@ -1538,7 +1633,7 @@ free_bounce_page(bus_dma_tag_t dmat, str if (reserve_bounce_pages(map->dmat, map, 1) == 0) { STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); STAILQ_INSERT_TAIL(&bounce_map_callbacklist, - map, links); + map, links); busdma_swi_pending = 1; bz->total_deferred++; swi_sched(vm_ih, 0); @@ -1558,10 +1653,10 @@ busdma_swi(void) STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); mtx_unlock(&bounce_lock); dmat = map->dmat; - (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); + dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_LOCK); bus_dmamap_load_mem(map->dmat, map, &map->mem, map->callback, - map->callback_arg, BUS_DMA_WAITOK); - (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); + map->callback_arg, BUS_DMA_WAITOK); + dmat->lockfunc(dmat->lockfuncarg, BUS_DMA_UNLOCK); mtx_lock(&bounce_lock); } mtx_unlock(&bounce_lock);