Date: Sun, 3 May 2026 21:30:08 -0700 From: Ryan Libby <rlibby@freebsd.org> To: Andrew Turner <andrew@freebsd.org> Cc: src-committers@freebsd.org, dev-commits-src-all@freebsd.org, dev-commits-src-main@freebsd.org, Sarah Walker <sarah.walker2@arm.com> Subject: Re: git: c16c95192f01 - main - virtio_net: Use bus_dma for rxq/txq buffers Message-ID: <CAHgpiFzYNMQ_t6f_ZRQipwSgYAyUB29XCrrz-8FnkWnukEFcMg@mail.gmail.com> In-Reply-To: <69ef4acf.1fcd2.4d4d3e54@gitrepo.freebsd.org>
index | next in thread | previous in thread | raw e-mail
On Mon, Apr 27, 2026 at 4:39 AM Andrew Turner <andrew@freebsd.org> wrote: > > The branch main has been updated by andrew: > > URL: https://cgit.FreeBSD.org/src/commit/?id=c16c95192f01237a876eb7bc336e3bbda9310171 > > commit c16c95192f01237a876eb7bc336e3bbda9310171 > Author: Sarah Walker <sarah.walker2@arm.com> > AuthorDate: 2026-02-16 14:19:13 +0000 > Commit: Andrew Turner <andrew@FreeBSD.org> > CommitDate: 2026-04-27 11:37:53 +0000 > > virtio_net: Use bus_dma for rxq/txq buffers > > While the majority of virtio platforms will be fully coherent, some may > require cache maintenance or other specific device memory handling (eg for > secure partitioning). Using bus_dma allows for these usecases. > > The virtio buffers are marked as coherent; this should ensure that sync > calls are no-ops in the common cases. > > Reviewed by: andrew > Sponsored by: Arm Ltd > Differential Revision: https://reviews.freebsd.org/D55492 > --- > sys/dev/virtio/network/if_vtnet.c | 275 ++++++++++++++++++++++++++++++++--- > sys/dev/virtio/network/if_vtnetvar.h | 10 ++ > 2 files changed, 268 insertions(+), 17 deletions(-) > > diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c > index ef01833b9e03..e88602a44664 100644 > --- a/sys/dev/virtio/network/if_vtnet.c > +++ b/sys/dev/virtio/network/if_vtnet.c > @@ -96,6 +96,17 @@ > #define VTNET_ETHER_ALIGN ETHER_ALIGN > #endif > > +/* > + * Worst case offset to ensure header doesn't share any cache lines with > + * payload. > + */ > +#define VTNET_RX_BUFFER_HEADER_OFFSET 128 > + > +struct vtnet_rx_buffer_header { > + bus_addr_t addr; > + bus_dmamap_t dmap; > +}; > + > static int vtnet_modevent(module_t, int, void *); > > static int vtnet_probe(device_t); > @@ -384,6 +395,17 @@ MODULE_DEPEND(vtnet, netmap, 1, 1, 1); > > VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter"); > > +static struct vtnet_rx_buffer_header * > +vtnet_mbuf_to_rx_buffer_header(struct vtnet_softc *sc, struct mbuf *m) > +{ > + if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) > + return (struct vtnet_rx_buffer_header *)((uintptr_t)m->m_data - > + VTNET_RX_BUFFER_HEADER_OFFSET - VTNET_ETHER_ALIGN); > + else > + return (struct vtnet_rx_buffer_header *)((uintptr_t)m->m_data - > + VTNET_RX_BUFFER_HEADER_OFFSET); > +} > + > static int > vtnet_modevent(module_t mod __unused, int type, void *unused __unused) > { > @@ -457,6 +479,60 @@ vtnet_attach(device_t dev) > goto fail; > } > > + mtx_init(&sc->vtnet_rx_mtx, device_get_nameunit(dev), > + "VirtIO Net RX lock", MTX_DEF); > + > + error = bus_dma_tag_create( > + bus_get_dma_tag(dev), /* parent */ > + sizeof(uint16_t), /* alignment */ > + 0, /* boundary */ > + BUS_SPACE_MAXADDR, /* lowaddr */ > + BUS_SPACE_MAXADDR, /* highaddr */ > + NULL, NULL, /* filter, filterarg */ > + MJUM9BYTES, /* max request size */ > + 1, /* max # segments */ > + MJUM9BYTES, /* maxsegsize - worst case */ > + BUS_DMA_COHERENT, /* flags */ > + busdma_lock_mutex, /* lockfunc */ > + &sc->vtnet_rx_mtx, /* lockarg */ > + &sc->vtnet_rx_dmat); > + if (error) { > + device_printf(dev, "cannot create bus_dma_tag\n"); > + goto fail; > + } > + > + mtx_init(&sc->vtnet_tx_mtx, device_get_nameunit(dev), > + "VirtIO Net TX lock", MTX_DEF); > + > + error = bus_dma_tag_create( > + bus_get_dma_tag(dev), /* parent */ > + sizeof(uint16_t), /* alignment */ > + 0, /* boundary */ > + BUS_SPACE_MAXADDR, /* lowaddr */ > + BUS_SPACE_MAXADDR, /* highaddr */ > + NULL, NULL, /* filter, filterarg */ > + sc->vtnet_tx_nsegs * MJUM9BYTES, /* max request size */ > + sc->vtnet_tx_nsegs, /* max # segments */ > + MJUM9BYTES, /* maxsegsize */ > + BUS_DMA_COHERENT, /* flags */ > + busdma_lock_mutex, /* lockfunc */ > + &sc->vtnet_tx_mtx, /* lockarg */ > + &sc->vtnet_tx_dmat); > + if (error) { > + device_printf(dev, "cannot create bus_dma_tag\n"); > + goto fail; > + } > + > +#ifdef __powerpc__ > + /* > + * Virtio uses physical addresses rather than bus addresses, so we > + * need to ask busdma to skip the iommu physical->bus mapping. At > + * present, this is only a thing on the powerpc architectures. > + */ > + bus_dma_tag_set_iommu(sc->vtnet_rx_dmat, NULL, NULL); > + bus_dma_tag_set_iommu(sc->vtnet_tx_dmat, NULL, NULL); > +#endif > + > error = vtnet_alloc_rx_filters(sc); > if (error) { > device_printf(dev, "cannot allocate Rx filters\n"); > @@ -1545,6 +1621,11 @@ static struct mbuf * > vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) > { > struct mbuf *m_head, *m_tail, *m; > + struct vtnet_rx_buffer_header *vthdr; > + bus_dma_segment_t segs[1]; > + bus_dmamap_t dmap; > + int nsegs; > + int err; > int i, size; > > m_head = NULL; > @@ -1562,13 +1643,43 @@ vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) > } > > m->m_len = size; > + vthdr = (struct vtnet_rx_buffer_header *)m->m_data; > + > + /* Reserve space for header */ > + m_adj(m, VTNET_RX_BUFFER_HEADER_OFFSET); > + > /* > * Need to offset the mbuf if the header we're going to add > * will misalign. > */ > - if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) { > + if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) > m_adj(m, VTNET_ETHER_ALIGN); > + > + err = bus_dmamap_create(sc->vtnet_rx_dmat, 0, &dmap); > + if (err) { > + printf("Failed to create dmamap, err :%d\n", > + err); > + m_freem(m); > + return (NULL); > } > + > + nsegs = 0; > + err = bus_dmamap_load_mbuf_sg(sc->vtnet_rx_dmat, dmap, m, segs, > + &nsegs, BUS_DMA_NOWAIT); > + if (err != 0) { > + printf("Failed to map mbuf into DMA visible memory, err: %d\n", > + err); > + m_freem(m); > + bus_dmamap_destroy(sc->vtnet_rx_dmat, dmap); > + return (NULL); > + } > + KASSERT(nsegs == 1, > + ("%s: unexpected number of DMA segments for rx buffer: %d", > + __func__, nsegs)); > + > + vthdr->addr = segs[0].ds_addr; > + vthdr->dmap = dmap; > + > if (m_head != NULL) { > m_tail->m_next = m; > m_tail = m; > @@ -1594,7 +1705,7 @@ vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0, > int len, clustersz, nreplace, error; > > sc = rxq->vtnrx_sc; > - clustersz = sc->vtnet_rx_clustersz; > + clustersz = sc->vtnet_rx_clustersz - VTNET_RX_BUFFER_HEADER_OFFSET; > /* > * Need to offset the mbuf if the header we're going to add will > * misalign, account for that here. > @@ -1709,9 +1820,12 @@ vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) > static int > vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) > { > + struct vtnet_rx_buffer_header *hdr; > struct vtnet_softc *sc; > struct sglist *sg; > int header_inlined, error; > + bus_addr_t paddr; > + struct mbuf *mp; > > sc = rxq->vtnrx_sc; > sg = rxq->vtnrx_sg; > @@ -1724,28 +1838,38 @@ vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) > header_inlined = vtnet_modern(sc) || > (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */ > > + hdr = vtnet_mbuf_to_rx_buffer_header(sc, m); > + paddr = hdr->addr; > + > /* > * Note: The mbuf has been already adjusted when we allocate it if we > * have to do strict alignment. > */ > - if (header_inlined) > - error = sglist_append_mbuf(sg, m); > - else { > - struct vtnet_rx_header *rxhdr = > - mtod(m, struct vtnet_rx_header *); > + if (header_inlined) { > + error = sglist_append_phys(sg, paddr, m->m_len); > + } else { > MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); > > /* Append the header and remaining mbuf data. */ > - error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); > + error = sglist_append_phys(sg, paddr, sc->vtnet_hdr_size); > if (error) > return (error); > - error = sglist_append(sg, &rxhdr[1], > + error = sglist_append_phys(sg, > + paddr + sizeof(struct vtnet_rx_header), > m->m_len - sizeof(struct vtnet_rx_header)); > if (error) > return (error); > > - if (m->m_next != NULL) > - error = sglist_append_mbuf(sg, m->m_next); > + mp = m->m_next; > + while (mp) { > + hdr = vtnet_mbuf_to_rx_buffer_header(sc, mp); > + paddr = hdr->addr; > + error = sglist_append_phys(sg, paddr, mp->m_len); > + if (error) > + return (error); > + > + mp = mp->m_next; > + } > } > > if (error) > @@ -1931,6 +2055,7 @@ vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) > m_tail = m_head; > > while (--nbufs > 0) { > + struct vtnet_rx_buffer_header *vthdr; > struct mbuf *m; > uint32_t len; > > @@ -1940,6 +2065,10 @@ vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) > goto fail; > } > > + vthdr = vtnet_mbuf_to_rx_buffer_header(sc, m); > + bus_dmamap_sync(sc->vtnet_rx_dmat, vthdr->dmap, > + BUS_DMASYNC_POSTREAD); > + > if (vtnet_rxq_new_buf(rxq) != 0) { > rxq->vtnrx_stats.vrxs_iqdrops++; > vtnet_rxq_discard_buf(rxq, m); > @@ -2060,6 +2189,7 @@ static int > vtnet_rxq_eof(struct vtnet_rxq *rxq) > { > struct virtio_net_hdr lhdr, *hdr; > + struct vtnet_rx_buffer_header *vthdr; > struct vtnet_softc *sc; > if_t ifp; > struct virtqueue *vq; > @@ -2075,14 +2205,31 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq) > > CURVNET_SET(if_getvnet(ifp)); > while (count-- > 0) { > - struct mbuf *m; > + struct mbuf *m, *mp; > uint32_t len, nbufs, adjsz; > + uint32_t synced; > > m = virtqueue_dequeue(vq, &len); > if (m == NULL) > break; > deq++; > > + mp = m; > + > + /* > + * Sync all mbufs in this packet. There will only be a single > + * mbuf unless LRO is in use. > + */ > + synced = 0; > + while (mp && synced < len) { > + vthdr = vtnet_mbuf_to_rx_buffer_header(sc, mp); > + bus_dmamap_sync(sc->vtnet_rx_dmat, vthdr->dmap, > + BUS_DMASYNC_POSTREAD); > + > + synced += mp->m_len; > + mp = mp->m_next; > + } > + > if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { > rxq->vtnrx_stats.vrxs_ierrors++; > vtnet_rxq_discard_buf(rxq, m); > @@ -2342,6 +2489,14 @@ vtnet_txq_free_mbufs(struct vtnet_txq *txq) > > while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { > if (kring == NULL) { > + bus_dmamap_unload(txq->vtntx_sc->vtnet_tx_dmat, > + txhdr->dmap); > + bus_dmamap_destroy(txq->vtntx_sc->vtnet_tx_dmat, > + txhdr->dmap); > + bus_dmamap_unload(txq->vtntx_sc->vtnet_tx_dmat, > + txhdr->hdr_dmap); > + bus_dmamap_destroy(txq->vtntx_sc->vtnet_tx_dmat, > + txhdr->hdr_dmap); > m_freem(txhdr->vth_mbuf); > uma_zfree(vtnet_tx_header_zone, txhdr); > } > @@ -2511,15 +2666,36 @@ drop: > return (NULL); > } > > +static void > +vtnet_txq_enqueue_callback(void *arg, bus_dma_segment_t *segs, > + int nsegs, int error) > +{ > + vm_paddr_t *hdr_paddr; > + > + if (error != 0) > + return; > + > + KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs)); > + > + hdr_paddr = (vm_paddr_t *)arg; > + *hdr_paddr = segs[0].ds_addr; > +} > + > static int > vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, > struct vtnet_tx_header *txhdr) > { > + bus_dma_segment_t segs[VTNET_TX_SEGS_MAX]; > + int nsegs; > struct vtnet_softc *sc; > struct virtqueue *vq; > struct sglist *sg; > struct mbuf *m; > int error; > + vm_paddr_t hdr_paddr; > + bus_dmamap_t hdr_dmap; > + bus_dmamap_t dmap; > + int i; > > sc = txq->vtntx_sc; > vq = txq->vtntx_vq; > @@ -2527,15 +2703,55 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, > m = *m_head; > > sglist_reset(sg); > - error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); > + > + error = bus_dmamap_create(sc->vtnet_tx_dmat, 0, &hdr_dmap); > + if (error) > + goto fail; > + > + error = bus_dmamap_load(sc->vtnet_tx_dmat, hdr_dmap, &txhdr->vth_uhdr, > + sc->vtnet_hdr_size, vtnet_txq_enqueue_callback, &hdr_paddr, > + BUS_DMA_NOWAIT); > + if (error) > + goto fail_hdr_dmamap_destroy; > + > + error = sglist_append_phys(sg, hdr_paddr, sc->vtnet_hdr_size); > if (error != 0 || sg->sg_nseg != 1) { > KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d", > __func__, error, sg->sg_nseg)); > - goto fail; > + goto fail_hdr_dmamap_unload; > } > > - error = sglist_append_mbuf(sg, m); > + bus_dmamap_sync(sc->vtnet_tx_dmat, hdr_dmap, BUS_DMASYNC_PREWRITE); > + > + error = bus_dmamap_create(sc->vtnet_tx_dmat, 0, &dmap); > + if (error) > + goto fail_hdr_dmamap_unload; > + > + nsegs = 0; > + error = bus_dmamap_load_mbuf_sg(sc->vtnet_tx_dmat, dmap, m, segs, > + &nsegs, BUS_DMA_NOWAIT); > + if (error != 0) > + goto fail_dmamap_destroy; > + KASSERT(nsegs <= sc->vtnet_tx_nsegs, > + ("%s: unexpected number of DMA segments for tx buffer: %d (max %d)", > + __func__, nsegs, sc->vtnet_tx_nsegs)); > + > + bus_dmamap_sync(sc->vtnet_tx_dmat, dmap, BUS_DMASYNC_PREWRITE); > + > + for (i = 0; i < nsegs && !error; i++) > + error = sglist_append_phys(sg, segs[i].ds_addr, segs[i].ds_len); > + > if (error) { > + sglist_reset(sg); > + bus_dmamap_unload(sc->vtnet_tx_dmat, dmap); > + > + error = sglist_append_phys(sg, hdr_paddr, sc->vtnet_hdr_size); > + if (error != 0 || sg->sg_nseg != 1) { > + KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d", > + __func__, error, sg->sg_nseg)); > + goto fail_dmamap_destroy; > + } > + > m = m_defrag(m, M_NOWAIT); > if (m == NULL) { > sc->vtnet_stats.tx_defrag_failed++; > @@ -2545,16 +2761,41 @@ vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, > *m_head = m; > sc->vtnet_stats.tx_defragged++; > > - error = sglist_append_mbuf(sg, m); > + nsegs = 0; > + error = bus_dmamap_load_mbuf_sg(sc->vtnet_tx_dmat, dmap, m, > + segs, &nsegs, BUS_DMA_NOWAIT); > + if (error != 0) > + goto fail_dmamap_destroy; > + KASSERT(nsegs <= sc->vtnet_tx_nsegs, > + ("%s: unexpected number of DMA segments for tx buffer: %d (max %d)", > + __func__, nsegs, sc->vtnet_tx_nsegs)); > + > + bus_dmamap_sync(sc->vtnet_tx_dmat, dmap, BUS_DMASYNC_PREWRITE); > + > + for (i = 0; i < nsegs && !error; i++) > + error = sglist_append_phys(sg, segs[i].ds_addr, > + segs[i].ds_len); > + > if (error) > - goto fail; > + goto fail_dmamap_unload; > } > > txhdr->vth_mbuf = m; > + txhdr->dmap = dmap; > + txhdr->hdr_dmap = hdr_dmap; > + > error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); > > return (error); > > +fail_dmamap_unload: > + bus_dmamap_unload(sc->vtnet_tx_dmat, dmap); > +fail_dmamap_destroy: > + bus_dmamap_destroy(sc->vtnet_tx_dmat, dmap); > +fail_hdr_dmamap_unload: > + bus_dmamap_unload(sc->vtnet_tx_dmat, hdr_dmap); > +fail_hdr_dmamap_destroy: > + bus_dmamap_destroy(sc->vtnet_tx_dmat, hdr_dmap); > fail: > m_freem(*m_head); > *m_head = NULL; > diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h > index eb5e6784b07f..6cafe827d733 100644 > --- a/sys/dev/virtio/network/if_vtnetvar.h > +++ b/sys/dev/virtio/network/if_vtnetvar.h > @@ -190,6 +190,12 @@ struct vtnet_softc { > struct mtx vtnet_mtx; > char vtnet_mtx_name[16]; > uint8_t vtnet_hwaddr[ETHER_ADDR_LEN]; > + > + bus_dma_tag_t vtnet_rx_dmat; > + struct mtx vtnet_rx_mtx; > + > + bus_dma_tag_t vtnet_tx_dmat; > + struct mtx vtnet_tx_mtx; > }; > /* vtnet flag descriptions for use with printf(9) %b identifier. */ > #define VTNET_FLAGS_BITS \ > @@ -273,6 +279,10 @@ struct vtnet_tx_header { > } vth_uhdr; > > struct mbuf *vth_mbuf; > + > + bus_dmamap_t dmap; > + > + bus_dmamap_t hdr_dmap; > }; > > /* > I believe this is causing a massive memory leak in the devbuf malloc type. I can't even get through a build over nfs on a VM without hitting OOM. Reverting b5bad6df467cc95bea641afe674c55cd5b9f1510 and c16c95192f01237a876eb7bc336e3bbda9310171 fixes the leak. Can you please fix or revert this? You can monitor the leak with something like vmstat -m | grep devbuf > @@ -1562,13 +1643,43 @@ vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) > } > > m->m_len = size; > + vthdr = (struct vtnet_rx_buffer_header *)m->m_data; > + > + /* Reserve space for header */ > + m_adj(m, VTNET_RX_BUFFER_HEADER_OFFSET); > + > /* > * Need to offset the mbuf if the header we're going to add > * will misalign. > */ > - if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) { > + if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) > m_adj(m, VTNET_ETHER_ALIGN); > + > + err = bus_dmamap_create(sc->vtnet_rx_dmat, 0, &dmap); > + if (err) { > + printf("Failed to create dmamap, err :%d\n", > + err); > + m_freem(m); > + return (NULL); > } > + > + nsegs = 0; > + err = bus_dmamap_load_mbuf_sg(sc->vtnet_rx_dmat, dmap, m, segs, > + &nsegs, BUS_DMA_NOWAIT); Where is the bus_dmamap_unload and bus_dmamap_destroy for the rx bufs? Ryanhome | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAHgpiFzYNMQ_t6f_ZRQipwSgYAyUB29XCrrz-8FnkWnukEFcMg>
