Date: Tue, 9 Jan 2024 19:09:50 GMT From: John Baldwin <jhb@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: a9b615120628 - main - memdesc: Helper function to construct mbuf chain backed by memdesc buffer Message-ID: <202401091909.409J9orQ099743@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=a9b615120628411cc20af730edb291c52be02f66 commit a9b615120628411cc20af730edb291c52be02f66 Author: John Baldwin <jhb@FreeBSD.org> AuthorDate: 2024-01-09 19:05:03 +0000 Commit: John Baldwin <jhb@FreeBSD.org> CommitDate: 2024-01-09 19:09:43 +0000 memdesc: Helper function to construct mbuf chain backed by memdesc buffer memdesc_alloc_ext_mbufs constructs a chain of external (M_EXT or M_EXTPG) mbufs backed by a data buffer described by a memory descriptor. Since memory descriptors are not an actual buffer just a description of a buffer, the caller is required to supply a couple of helper routines to manage allocation of the raw mbufs and associating them with a reference to the underlying buffer. Reviewed by: markj Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D42933 --- sys/kern/subr_memdesc.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++ sys/sys/memdesc.h | 36 ++++ 2 files changed, 510 insertions(+) diff --git a/sys/kern/subr_memdesc.c b/sys/kern/subr_memdesc.c index 9ba9d7fe031b..ff8aad7731cd 100644 --- a/sys/kern/subr_memdesc.c +++ b/sys/kern/subr_memdesc.c @@ -33,9 +33,14 @@ #include <sys/uio.h> #include <vm/vm.h> #include <vm/pmap.h> +#include <vm/vm_page.h> #include <vm/vm_param.h> #include <machine/bus.h> +/* + * memdesc_copyback copies data from a source buffer into a buffer + * described by a memory descriptor. + */ static void phys_copyback(vm_paddr_t pa, int off, int size, const void *src) { @@ -180,6 +185,10 @@ memdesc_copyback(struct memdesc *mem, int off, int size, const void *src) } } +/* + * memdesc_copydata copies data from a buffer described by a memory + * descriptor into a destination buffer. + */ static void phys_copydata(vm_paddr_t pa, int off, int size, void *dst) { @@ -323,3 +332,468 @@ memdesc_copydata(struct memdesc *mem, int off, int size, void *dst) __assert_unreachable(); } } + +/* + * memdesc_alloc_ext_mbufs allocates a chain of external mbufs backed + * by the storage of a memory descriptor's data buffer. + */ +static struct mbuf * +vaddr_ext_mbuf(memdesc_alloc_ext_mbuf_t *ext_alloc, void *cb_arg, int how, + void *buf, size_t len, size_t *actual_len) +{ + *actual_len = len; + return (ext_alloc(cb_arg, how, buf, len)); +} + +static bool +can_append_paddr(struct mbuf *m, vm_paddr_t pa) +{ + u_int last_len; + + /* Can always append to an empty mbuf. */ + if (m->m_epg_npgs == 0) + return (true); + + /* Can't append to a full mbuf. */ + if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) + return (false); + + /* Can't append a non-page-aligned address to a non-empty mbuf. */ + if ((pa & PAGE_MASK) != 0) + return (false); + + /* Can't append if the last page is not a full page. */ + last_len = m->m_epg_last_len; + if (m->m_epg_npgs == 1) + last_len += m->m_epg_1st_off; + return (last_len == PAGE_SIZE); +} + +/* + * Returns amount of data added to an M_EXTPG mbuf. + */ +static size_t +append_paddr_range(struct mbuf *m, vm_paddr_t pa, size_t len) +{ + size_t appended; + + appended = 0; + + /* Append the first page. */ + if (m->m_epg_npgs == 0) { + m->m_epg_pa[0] = trunc_page(pa); + m->m_epg_npgs = 1; + m->m_epg_1st_off = pa & PAGE_MASK; + m->m_epg_last_len = PAGE_SIZE - m->m_epg_1st_off; + if (m->m_epg_last_len > len) + m->m_epg_last_len = len; + m->m_len = m->m_epg_last_len; + len -= m->m_epg_last_len; + pa += m->m_epg_last_len; + appended += m->m_epg_last_len; + } + KASSERT(len == 0 || (pa & PAGE_MASK) == 0, + ("PA not aligned before full pages")); + + /* Full pages. */ + while (len >= PAGE_SIZE && m->m_epg_npgs < MBUF_PEXT_MAX_PGS) { + m->m_epg_pa[m->m_epg_npgs] = pa; + m->m_epg_npgs++; + m->m_epg_last_len = PAGE_SIZE; + m->m_len += PAGE_SIZE; + pa += PAGE_SIZE; + len -= PAGE_SIZE; + appended += PAGE_SIZE; + } + + /* Final partial page. */ + if (len > 0 && m->m_epg_npgs < MBUF_PEXT_MAX_PGS) { + KASSERT(len < PAGE_SIZE, ("final page is full page")); + m->m_epg_pa[m->m_epg_npgs] = pa; + m->m_epg_npgs++; + m->m_epg_last_len = len; + m->m_len += len; + appended += len; + } + + return (appended); +} + +static struct mbuf * +paddr_ext_mbuf(memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int how, + vm_paddr_t pa, size_t len, size_t *actual_len, bool can_truncate) +{ + struct mbuf *m, *tail; + size_t appended; + + if (can_truncate) { + vm_paddr_t end; + + /* + * Trim any partial page at the end, but not if it's + * the only page. + */ + end = trunc_page(pa + len); + if (end > pa) + len = end - pa; + } + *actual_len = len; + + m = tail = extpg_alloc(cb_arg, how); + if (m == NULL) + return (NULL); + while (len > 0) { + if (!can_append_paddr(tail, pa)) { + MBUF_EXT_PGS_ASSERT_SANITY(tail); + tail->m_next = extpg_alloc(cb_arg, how); + if (tail->m_next == NULL) + goto error; + tail = tail->m_next; + } + + appended = append_paddr_range(tail, pa, len); + KASSERT(appended > 0, ("did not append anything")); + KASSERT(appended <= len, ("appended too much")); + + pa += appended; + len -= appended; + } + + MBUF_EXT_PGS_ASSERT_SANITY(tail); + return (m); +error: + m_freem(m); + return (NULL); +} + +static struct mbuf * +vlist_ext_mbuf(memdesc_alloc_ext_mbuf_t *ext_alloc, void *cb_arg, int how, + struct bus_dma_segment *vlist, u_int sglist_cnt, size_t offset, + size_t len, size_t *actual_len) +{ + struct mbuf *m, *n, *tail; + size_t todo; + + *actual_len = len; + + while (vlist->ds_len <= offset) { + KASSERT(sglist_cnt > 1, ("out of sglist entries")); + + offset -= vlist->ds_len; + vlist++; + sglist_cnt--; + } + + m = tail = NULL; + while (len > 0) { + KASSERT(sglist_cnt >= 1, ("out of sglist entries")); + + todo = len; + if (todo > vlist->ds_len - offset) + todo = vlist->ds_len - offset; + + n = ext_alloc(cb_arg, how, (char *)(uintptr_t)vlist->ds_addr + + offset, todo); + if (n == NULL) + goto error; + + if (m == NULL) { + m = n; + tail = m; + } else { + tail->m_next = n; + tail = n; + } + + offset = 0; + vlist++; + sglist_cnt--; + len -= todo; + } + + return (m); +error: + m_freem(m); + return (NULL); +} + +static struct mbuf * +plist_ext_mbuf(memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int how, + struct bus_dma_segment *plist, u_int sglist_cnt, size_t offset, size_t len, + size_t *actual_len, bool can_truncate) +{ + vm_paddr_t pa; + struct mbuf *m, *tail; + size_t appended, totlen, todo; + + while (plist->ds_len <= offset) { + KASSERT(sglist_cnt > 1, ("out of sglist entries")); + + offset -= plist->ds_len; + plist++; + sglist_cnt--; + } + + totlen = 0; + m = tail = extpg_alloc(cb_arg, how); + if (m == NULL) + return (NULL); + while (len > 0) { + KASSERT(sglist_cnt >= 1, ("out of sglist entries")); + + pa = plist->ds_addr + offset; + todo = len; + if (todo > plist->ds_len - offset) + todo = plist->ds_len - offset; + + /* + * If truncation is enabled, avoid sending a final + * partial page, but only if there is more data + * available in the current segment. Also, at least + * some data must be sent, so only drop the final page + * for this segment if the segment spans multiple + * pages or some other data is already queued. + */ + else if (can_truncate) { + vm_paddr_t end; + + end = trunc_page(pa + len); + if (end <= pa && totlen != 0) { + /* + * This last segment is only a partial + * page. + */ + len = 0; + break; + } + todo = end - pa; + } + + offset = 0; + len -= todo; + totlen += todo; + + while (todo > 0) { + if (!can_append_paddr(tail, pa)) { + MBUF_EXT_PGS_ASSERT_SANITY(tail); + tail->m_next = extpg_alloc(cb_arg, how); + if (tail->m_next == NULL) + goto error; + tail = tail->m_next; + } + + appended = append_paddr_range(tail, pa, todo); + KASSERT(appended > 0, ("did not append anything")); + + pa += appended; + todo -= appended; + } + } + + MBUF_EXT_PGS_ASSERT_SANITY(tail); + *actual_len = totlen; + return (m); +error: + m_freem(m); + return (NULL); +} + +static struct mbuf * +vmpages_ext_mbuf(memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int how, + vm_page_t *ma, size_t offset, size_t len, size_t *actual_len, + bool can_truncate) +{ + struct mbuf *m, *tail; + + while (offset >= PAGE_SIZE) { + ma++; + offset -= PAGE_SIZE; + } + + if (can_truncate) { + size_t end; + + /* + * Trim any partial page at the end, but not if it's + * the only page. + */ + end = trunc_page(offset + len); + if (end > offset) + len = end - offset; + } + *actual_len = len; + + m = tail = extpg_alloc(cb_arg, how); + if (m == NULL) + return (NULL); + + /* First page. */ + m->m_epg_pa[0] = VM_PAGE_TO_PHYS(*ma); + ma++; + m->m_epg_npgs = 1; + m->m_epg_1st_off = offset; + m->m_epg_last_len = PAGE_SIZE - offset; + if (m->m_epg_last_len > len) + m->m_epg_last_len = len; + m->m_len = m->m_epg_last_len; + len -= m->m_epg_last_len; + + /* Full pages. */ + while (len >= PAGE_SIZE) { + if (tail->m_epg_npgs == MBUF_PEXT_MAX_PGS) { + MBUF_EXT_PGS_ASSERT_SANITY(tail); + tail->m_next = extpg_alloc(cb_arg, how); + if (tail->m_next == NULL) + goto error; + tail = tail->m_next; + } + + tail->m_epg_pa[tail->m_epg_npgs] = VM_PAGE_TO_PHYS(*ma); + ma++; + tail->m_epg_npgs++; + tail->m_epg_last_len = PAGE_SIZE; + tail->m_len += PAGE_SIZE; + len -= PAGE_SIZE; + } + + /* Last partial page. */ + if (len > 0) { + if (tail->m_epg_npgs == MBUF_PEXT_MAX_PGS) { + MBUF_EXT_PGS_ASSERT_SANITY(tail); + tail->m_next = extpg_alloc(cb_arg, how); + if (tail->m_next == NULL) + goto error; + tail = tail->m_next; + } + + tail->m_epg_pa[tail->m_epg_npgs] = VM_PAGE_TO_PHYS(*ma); + ma++; + tail->m_epg_npgs++; + tail->m_epg_last_len = len; + tail->m_len += len; + } + + MBUF_EXT_PGS_ASSERT_SANITY(tail); + return (m); +error: + m_freem(m); + return (NULL); +} + +/* + * Somewhat similar to m_copym but optionally avoids a partial mbuf at + * the end. + */ +static struct mbuf * +mbuf_subchain(struct mbuf *m0, size_t offset, size_t len, + size_t *actual_len, bool can_truncate, int how) +{ + struct mbuf *m, *tail; + size_t totlen; + + while (offset >= m0->m_len) { + offset -= m0->m_len; + m0 = m0->m_next; + } + + /* Always return at least one mbuf. */ + totlen = m0->m_len - offset; + if (totlen > len) + totlen = len; + + m = m_get(how, MT_DATA); + if (m == NULL) + return (NULL); + m->m_len = totlen; + if (m0->m_flags & (M_EXT | M_EXTPG)) { + m->m_data = m0->m_data + offset; + mb_dupcl(m, m0); + } else + memcpy(mtod(m, void *), mtodo(m0, offset), m->m_len); + + tail = m; + m0 = m0->m_next; + len -= totlen; + while (len > 0) { + /* + * If truncation is enabled, don't send any partial + * mbufs besides the first one. + */ + if (can_truncate && m0->m_len > len) + break; + + tail->m_next = m_get(how, MT_DATA); + if (tail->m_next == NULL) + goto error; + tail = tail->m_next; + tail->m_len = m0->m_len; + if (m0->m_flags & (M_EXT | M_EXTPG)) { + tail->m_data = m0->m_data; + mb_dupcl(tail, m0); + } else + memcpy(mtod(tail, void *), mtod(m0, void *), + tail->m_len); + + totlen += tail->m_len; + m0 = m0->m_next; + len -= tail->m_len; + } + *actual_len = totlen; + return (m); +error: + m_freem(m); + return (NULL); +} + +struct mbuf * +memdesc_alloc_ext_mbufs(struct memdesc *mem, + memdesc_alloc_ext_mbuf_t *ext_alloc, + memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int how, + size_t offset, size_t len, size_t *actual_len, bool can_truncate) +{ + struct mbuf *m; + size_t done; + + switch (mem->md_type) { + case MEMDESC_VADDR: + m = vaddr_ext_mbuf(ext_alloc, cb_arg, how, + (char *)mem->u.md_vaddr + offset, len, &done); + break; + case MEMDESC_PADDR: + m = paddr_ext_mbuf(extpg_alloc, cb_arg, how, mem->u.md_paddr + + offset, len, &done, can_truncate); + break; + case MEMDESC_VLIST: + m = vlist_ext_mbuf(ext_alloc, cb_arg, how, mem->u.md_list, + mem->md_nseg, offset, len, &done); + break; + case MEMDESC_PLIST: + m = plist_ext_mbuf(extpg_alloc, cb_arg, how, mem->u.md_list, + mem->md_nseg, offset, len, &done, can_truncate); + break; + case MEMDESC_UIO: + panic("uio not supported"); + case MEMDESC_MBUF: + m = mbuf_subchain(mem->u.md_mbuf, offset, len, &done, + can_truncate, how); + break; + case MEMDESC_VMPAGES: + m = vmpages_ext_mbuf(extpg_alloc, cb_arg, how, mem->u.md_ma, + mem->md_offset + offset, len, &done, can_truncate); + break; + default: + __assert_unreachable(); + } + if (m == NULL) + return (NULL); + + if (can_truncate) { + KASSERT(done <= len, ("chain too long")); + } else { + KASSERT(done == len, ("short chain with no limit")); + } + KASSERT(m_length(m, NULL) == done, ("length mismatch")); + if (actual_len != NULL) + *actual_len = done; + return (m); +} diff --git a/sys/sys/memdesc.h b/sys/sys/memdesc.h index 24a2c2da6347..f1880fee2cda 100644 --- a/sys/sys/memdesc.h +++ b/sys/sys/memdesc.h @@ -163,4 +163,40 @@ void memdesc_copyback(struct memdesc *mem, int off, int size, const void *src); void memdesc_copydata(struct memdesc *mem, int off, int size, void *dst); +/* + * This routine constructs a chain of M_EXT mbufs backed by a data + * buffer described by a memory descriptor. Some buffers may require + * multiple mbufs. For memory descriptors using unmapped storage + * (e.g. memdesc_vmpages), M_EXTPG mbufs are used. + * + * Since memory descriptors are not an actual buffer, just a + * description of the buffer, the caller is required to supply a + * couple of helper routines to manage allocation of the raw mbufs and + * associate them with a reference to the underlying buffer. + * + * The memdesc_alloc_ext_mbuf_t callback is passed the callback + * argument as its first argument, the how flag as its second + * argument, and the pointer and length of a KVA buffer. This + * callback should allocate an mbuf for the KVA buffer, either by + * making a copy of the data or using m_extaddref(). + * + * The memdesc_alloc_extpg_mbuf_t callback is passed the callback + * argument as its first argument and the how flag as its second + * argument. It should return an empty mbuf allocated by + * mb_alloc_ext_pgs. + * + * If either of the callbacks returns NULL, any partially allocated + * chain is freed and this routine returns NULL. + * + * If can_truncate is true, then this function might return a short + * chain to avoid gratuitously splitting up a page. + */ +typedef struct mbuf *memdesc_alloc_ext_mbuf_t(void *, int, void *, size_t); +typedef struct mbuf *memdesc_alloc_extpg_mbuf_t(void *, int); + +struct mbuf *memdesc_alloc_ext_mbufs(struct memdesc *mem, + memdesc_alloc_ext_mbuf_t *ext_alloc, + memdesc_alloc_extpg_mbuf_t *extpg_alloc, void *cb_arg, int how, + size_t offset, size_t len, size_t *actual_len, bool can_truncate); + #endif /* _SYS_MEMDESC_H_ */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202401091909.409J9orQ099743>