Date: Wed, 3 Sep 2025 15:55:37 GMT From: Warner Losh <imp@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: dc74f3003c2d - main - nvme: Call vm_fault_hold_pages instead of vmapbuf Message-ID: <202509031555.583FtbZD058239@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=dc74f3003c2d1deea654f24b76a1dd932d428ca0 commit dc74f3003c2d1deea654f24b76a1dd932d428ca0 Author: Warner Losh <imp@FreeBSD.org> AuthorDate: 2025-09-03 15:06:37 +0000 Commit: Warner Losh <imp@FreeBSD.org> CommitDate: 2025-09-03 15:55:24 +0000 nvme: Call vm_fault_hold_pages instead of vmapbuf Use the underlying mechanism of vmapbuf instead of using this legacy interface. This means we don't have to allocate a buf, and can store the page array on the stack as it will be small enough for transfers that the vast majority of cards can do. And those that can do larger (> 512k) have provisions to split up requests. Sponsored by: Netflix Reviewed by: kib, markj Differential Revision: https://reviews.freebsd.org/D52149 --- sys/dev/nvme/nvme_ctrlr.c | 98 ++++++++++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 39 deletions(-) diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 49960b0f920a..fc912c1342f4 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -41,6 +41,9 @@ #include <sys/endian.h> #include <sys/stdarg.h> #include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_extern.h> +#include <vm/vm_map.h> #include "nvme_private.h" #include "nvme_linux.h" @@ -1265,6 +1268,34 @@ nvme_ctrlr_shared_handler(void *arg) nvme_mmio_write_4(ctrlr, intmc, 1); } +#define NVME_MAX_PAGES (int)(1024 / sizeof(vm_page_t)) + +static int +nvme_user_ioctl_req(vm_offset_t addr, size_t len, bool is_read, + vm_page_t *upages, int max_pages, int *npagesp, struct nvme_request **req, + nvme_cb_fn_t cb_fn, void *cb_arg) +{ + vm_prot_t prot = VM_PROT_READ; + int err; + + if (is_read) + prot |= VM_PROT_WRITE; /* Device will write to host memory */ + err = vm_fault_hold_pages(&curproc->p_vmspace->vm_map, + addr, len, prot, upages, max_pages, npagesp); + if (err != 0) + return (err); + *req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg); + (*req)->payload = memdesc_vmpages(upages, len, addr & PAGE_MASK); + (*req)->payload_valid = true; + return (0); +} + +static void +nvme_user_ioctl_free(vm_page_t *pages, int npage) +{ + vm_page_unhold_pages(pages, npage); +} + static void nvme_pt_done(void *arg, const struct nvme_completion *cpl) { @@ -1287,30 +1318,28 @@ nvme_pt_done(void *arg, const struct nvme_completion *cpl) int nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, - struct nvme_pt_command *pt, uint32_t nsid, int is_user_buffer, + struct nvme_pt_command *pt, uint32_t nsid, int is_user, int is_admin_cmd) { - struct nvme_request *req; - struct mtx *mtx; - struct buf *buf = NULL; - int ret = 0; + struct nvme_request *req; + struct mtx *mtx; + int ret = 0; + int npages = 0; + vm_page_t upages[NVME_MAX_PAGES]; if (pt->len > 0) { if (pt->len > ctrlr->max_xfer_size) { - nvme_printf(ctrlr, "pt->len (%d) " - "exceeds max_xfer_size (%d)\n", pt->len, - ctrlr->max_xfer_size); - return EIO; + nvme_printf(ctrlr, + "len (%d) exceeds max_xfer_size (%d)\n", + pt->len, ctrlr->max_xfer_size); + return (EIO); } - if (is_user_buffer) { - buf = uma_zalloc(pbuf_zone, M_WAITOK); - buf->b_iocmd = pt->is_read ? BIO_READ : BIO_WRITE; - if (vmapbuf(buf, pt->buf, pt->len, 1) < 0) { - ret = EFAULT; - goto err; - } - req = nvme_allocate_request_vaddr(buf->b_data, pt->len, - M_WAITOK, nvme_pt_done, pt); + if (is_user) { + ret = nvme_user_ioctl_req((vm_offset_t)pt->buf, pt->len, + pt->is_read, upages, nitems(upages), &npages, &req, + nvme_pt_done, pt); + if (ret != 0) + return (ret); } else req = nvme_allocate_request_vaddr(pt->buf, pt->len, M_WAITOK, nvme_pt_done, pt); @@ -1344,11 +1373,8 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr, mtx_sleep(pt, mtx, PRIBIO, "nvme_pt", 0); mtx_unlock(mtx); - if (buf != NULL) { - vunmapbuf(buf); -err: - uma_zfree(pbuf_zone, buf); - } + if (npages > 0) + nvme_user_ioctl_free(upages, npages); return (ret); } @@ -1374,8 +1400,9 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, { struct nvme_request *req; struct mtx *mtx; - struct buf *buf = NULL; int ret = 0; + int npages = 0; + vm_page_t upages[NVME_MAX_PAGES]; /* * We don't support metadata. @@ -1386,7 +1413,7 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, if (npc->data_len > 0 && npc->addr != 0) { if (npc->data_len > ctrlr->max_xfer_size) { nvme_printf(ctrlr, - "npc->data_len (%d) exceeds max_xfer_size (%d)\n", + "data_len (%d) exceeds max_xfer_size (%d)\n", npc->data_len, ctrlr->max_xfer_size); return (EIO); } @@ -1399,15 +1426,11 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, if ((npc->opcode & 0x3) == 3) return (EINVAL); if (is_user) { - buf = uma_zalloc(pbuf_zone, M_WAITOK); - buf->b_iocmd = npc->opcode & 1 ? BIO_WRITE : BIO_READ; - if (vmapbuf(buf, (void *)(uintptr_t)npc->addr, - npc->data_len, 1) < 0) { - ret = EFAULT; - goto err; - } - req = nvme_allocate_request_vaddr(buf->b_data, - npc->data_len, M_WAITOK, nvme_npc_done, npc); + ret = nvme_user_ioctl_req(npc->addr, npc->data_len, + npc->opcode & 0x1, upages, nitems(upages), &npages, + &req, nvme_npc_done, npc); + if (ret != 0) + return (ret); } else req = nvme_allocate_request_vaddr( (void *)(uintptr_t)npc->addr, npc->data_len, @@ -1442,11 +1465,8 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr, mtx_sleep(npc, mtx, PRIBIO, "nvme_npc", 0); mtx_unlock(mtx); - if (buf != NULL) { - vunmapbuf(buf); -err: - uma_zfree(pbuf_zone, buf); - } + if (npages > 0) + nvme_user_ioctl_free(upages, npages); return (ret); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202509031555.583FtbZD058239>