From owner-svn-src-all@FreeBSD.ORG Thu Oct 18 00:40:41 2012 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id E546DF2; Thu, 18 Oct 2012 00:40:41 +0000 (UTC) (envelope-from jimharris@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id CB9E68FC08; Thu, 18 Oct 2012 00:40:41 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q9I0efg1001235; Thu, 18 Oct 2012 00:40:41 GMT (envelope-from jimharris@svn.freebsd.org) Received: (from jimharris@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q9I0efvo001229; Thu, 18 Oct 2012 00:40:41 GMT (envelope-from jimharris@svn.freebsd.org) Message-Id: <201210180040.q9I0efvo001229@svn.freebsd.org> From: Jim Harris Date: Thu, 18 Oct 2012 00:40:41 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r241661 - head/sys/dev/nvme X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 18 Oct 2012 00:40:42 -0000 Author: jimharris Date: Thu Oct 18 00:40:40 2012 New Revision: 241661 URL: http://svn.freebsd.org/changeset/base/241661 Log: Cleanup uio-related code to use struct nvme_request and nvme_ctrlr_submit_io_request(). While here, also fix case where a uio may have more than 1 iovec. NVMe's definition of SGEs (called PRPs) only allows for the first SGE to start on a non-page boundary. The simplest way to handle this is to construct a temporary uio for each iovec, and submit an NVMe request for each. Sponsored by: Intel Modified: head/sys/dev/nvme/nvme.c head/sys/dev/nvme/nvme_ctrlr.c head/sys/dev/nvme/nvme_private.h head/sys/dev/nvme/nvme_qpair.c head/sys/dev/nvme/nvme_uio.c Modified: head/sys/dev/nvme/nvme.c ============================================================================== --- head/sys/dev/nvme/nvme.c Thu Oct 18 00:39:29 2012 (r241660) +++ head/sys/dev/nvme/nvme.c Thu Oct 18 00:40:40 2012 (r241661) @@ -230,15 +230,11 @@ nvme_dump_completion(struct nvme_complet void nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) { - struct nvme_tracker *tr; - struct nvme_qpair *qpair; + struct nvme_tracker *tr = arg; uint32_t cur_nseg; KASSERT(error == 0, ("nvme_payload_map error != 0\n")); - tr = (struct nvme_tracker *)arg; - qpair = tr->qpair; - /* * Note that we specified PAGE_SIZE for alignment and max * segment size when creating the bus dma tags. So here @@ -259,7 +255,7 @@ nvme_payload_map(void *arg, bus_dma_segm } } - nvme_qpair_submit_cmd(qpair, tr); + nvme_qpair_submit_cmd(tr->qpair, tr); } static int Modified: head/sys/dev/nvme/nvme_ctrlr.c ============================================================================== --- head/sys/dev/nvme/nvme_ctrlr.c Thu Oct 18 00:39:29 2012 (r241660) +++ head/sys/dev/nvme/nvme_ctrlr.c Thu Oct 18 00:40:40 2012 (r241661) @@ -833,12 +833,21 @@ nvme_ctrlr_submit_io_request(struct nvme tr->req = req; - if (req->payload_size > 0) { - err = bus_dmamap_load(tr->qpair->dma_tag, tr->payload_dma_map, - req->payload, req->payload_size, - nvme_payload_map, tr, 0); + if (req->uio == NULL) { + if (req->payload_size > 0) { + err = bus_dmamap_load(tr->qpair->dma_tag, + tr->payload_dma_map, req->payload, + req->payload_size, + nvme_payload_map, tr, 0); + if (err != 0) + panic("bus_dmamap_load returned non-zero!\n"); + } else + nvme_qpair_submit_cmd(tr->qpair, tr); + } else { + err = bus_dmamap_load_uio(tr->qpair->dma_tag, + tr->payload_dma_map, req->uio, + nvme_payload_map_uio, tr, 0); if (err != 0) panic("bus_dmamap_load returned non-zero!\n"); - } else - nvme_qpair_submit_cmd(tr->qpair, tr); + } } Modified: head/sys/dev/nvme/nvme_private.h ============================================================================== --- head/sys/dev/nvme/nvme_private.h Thu Oct 18 00:39:29 2012 (r241660) +++ head/sys/dev/nvme/nvme_private.h Thu Oct 18 00:40:40 2012 (r241661) @@ -101,6 +101,7 @@ struct nvme_request { struct nvme_command cmd; void *payload; uint32_t payload_size; + struct uio *uio; nvme_cb_fn_t cb_fn; void *cb_arg; SLIST_ENTRY(nvme_request) slist; @@ -333,6 +334,8 @@ void nvme_ctrlr_cmd_asynchronous_event_r void nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error); +void nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg, + bus_size_t mapsize, int error); int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev); int nvme_ctrlr_reset(struct nvme_controller *ctrlr); @@ -392,6 +395,22 @@ nvme_allocate_request(void *payload, uin return (req); } +static __inline struct nvme_request * +nvme_allocate_request_uio(struct uio *uio, nvme_cb_fn_t cb_fn, void *cb_arg) +{ + struct nvme_request *req; + + req = uma_zalloc(nvme_request_zone, M_NOWAIT | M_ZERO); + if (req == NULL) + return (NULL); + + req->uio = uio; + req->cb_fn = cb_fn; + req->cb_arg = cb_arg; + + return (req); +} + #define nvme_free_request(req) uma_zfree(nvme_request_zone, req) #endif /* __NVME_PRIVATE_H__ */ Modified: head/sys/dev/nvme/nvme_qpair.c ============================================================================== --- head/sys/dev/nvme/nvme_qpair.c Thu Oct 18 00:39:29 2012 (r241660) +++ head/sys/dev/nvme/nvme_qpair.c Thu Oct 18 00:40:40 2012 (r241661) @@ -163,7 +163,7 @@ nvme_qpair_process_completions(struct nv /* nvme_qpair_submit_cmd() will release the lock. */ nvme_qpair_submit_cmd(qpair, tr); else { - if (req->payload_size > 0) + if (req->payload_size > 0 || req->uio != NULL) bus_dmamap_unload(qpair->dma_tag, tr->payload_dma_map); Modified: head/sys/dev/nvme/nvme_uio.c ============================================================================== --- head/sys/dev/nvme/nvme_uio.c Thu Oct 18 00:39:29 2012 (r241660) +++ head/sys/dev/nvme/nvme_uio.c Thu Oct 18 00:40:40 2012 (r241661) @@ -38,8 +38,10 @@ static void nvme_uio_done(void *arg, const struct nvme_completion *status) { struct mtx *mtx; + struct uio *uio = arg; - /* TODO: update uio flags based on status */ + if (status->sf_sc == 0 && status->sf_sct == 0) + uio->uio_resid = 0; mtx = mtx_pool_find(mtxpool_sleep, arg); mtx_lock(mtx); @@ -47,33 +49,17 @@ nvme_uio_done(void *arg, const struct nv mtx_unlock(mtx); } -static struct nvme_tracker * -nvme_allocate_tracker_uio(struct nvme_controller *ctrlr, struct uio *uio, - struct nvme_request *req) -{ - struct nvme_tracker *tr; - struct nvme_qpair *qpair; - - if (ctrlr->per_cpu_io_queues) - qpair = &ctrlr->ioq[curcpu]; - else - qpair = &ctrlr->ioq[0]; - - tr = nvme_qpair_allocate_tracker(qpair); - - if (tr == NULL) - return (NULL); - - tr->qpair = qpair; - tr->req = req; - - return (tr); -} - -static void +void nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg, bus_size_t mapsize, int error) { + struct nvme_tracker *tr = arg; + + /* + * Now that we know the actual size of the uio, divide it by the + * sector size that we stored in cdw12. + */ + tr->req->cmd.cdw12 = (mapsize / tr->req->cmd.cdw12)-1; nvme_payload_map(arg, seg, nseg, error); } @@ -81,20 +67,12 @@ static int nvme_read_uio(struct nvme_namespace *ns, struct uio *uio) { struct nvme_request *req; - struct nvme_tracker *tr; struct nvme_command *cmd; - int err, i; - uint64_t lba, iosize = 0; - - for (i = 0; i < uio->uio_iovcnt; i++) { - iosize += uio->uio_iov[i].iov_len; - } - - req = nvme_allocate_request(NULL, iosize, nvme_uio_done, uio); + uint64_t lba; - tr = nvme_allocate_tracker_uio(ns->ctrlr, uio, req); + req = nvme_allocate_request_uio(uio, nvme_uio_done, uio); - if (tr == NULL) + if (req == NULL) return (ENOMEM); cmd = &req->cmd; @@ -103,13 +81,16 @@ nvme_read_uio(struct nvme_namespace *ns, lba = uio->uio_offset / nvme_ns_get_sector_size(ns); *(uint64_t *)&cmd->cdw10 = lba; + /* + * Store the sector size in cdw12 (where the LBA count normally goes). + * We'll adjust cdw12 in the map_uio callback based on the mapsize + * parameter. This allows us to not have to store the namespace + * in the request simply to get the sector size in the map_uio + * callback. + */ + cmd->cdw12 = nvme_ns_get_sector_size(ns); - cmd->cdw12 = (iosize / nvme_ns_get_sector_size(ns))-1; - - err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, uio, - nvme_payload_map_uio, tr, 0); - - KASSERT(err == 0, ("bus_dmamap_load_uio returned non-zero!\n")); + nvme_ctrlr_submit_io_request(ns->ctrlr, req); return (0); } @@ -118,20 +99,12 @@ static int nvme_write_uio(struct nvme_namespace *ns, struct uio *uio) { struct nvme_request *req; - struct nvme_tracker *tr; struct nvme_command *cmd; - int err, i; - uint64_t lba, iosize = 0; + uint64_t lba; - for (i = 0; i < uio->uio_iovcnt; i++) { - iosize += uio->uio_iov[i].iov_len; - } - - req = nvme_allocate_request(NULL, iosize, nvme_uio_done, uio); - - tr = nvme_allocate_tracker_uio(ns->ctrlr, uio, req); + req = nvme_allocate_request_uio(uio, nvme_uio_done, uio); - if (tr == NULL) + if (req == NULL) return (ENOMEM); cmd = &req->cmd; @@ -140,13 +113,16 @@ nvme_write_uio(struct nvme_namespace *ns lba = uio->uio_offset / nvme_ns_get_sector_size(ns); *(uint64_t *)&cmd->cdw10 = lba; + /* + * Store the sector size in cdw12 (where the LBA count normally goes). + * We'll adjust cdw12 in the map_uio callback based on the mapsize + * parameter. This allows us to not have to store the namespace + * in the request simply to get the sector size in the map_uio + * callback. + */ + cmd->cdw12 = nvme_ns_get_sector_size(ns); - cmd->cdw12 = (iosize / nvme_ns_get_sector_size(ns))-1; - - err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, uio, - nvme_payload_map_uio, tr, 0); - - KASSERT(err == 0, ("bus_dmamap_load_uio returned non-zero!\n")); + nvme_ctrlr_submit_io_request(ns->ctrlr, req); return (0); } @@ -154,9 +130,11 @@ nvme_write_uio(struct nvme_namespace *ns int nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag) { + struct uio uio_tmp; + struct iovec uio_iov_tmp; struct nvme_namespace *ns; struct mtx *mtx; - int err; + int i, nvme_err, physio_err = 0; #if __FreeBSD_version > 900017 int ref; #endif @@ -164,7 +142,7 @@ nvme_ns_physio(struct cdev *dev, struct PHOLD(curproc); ns = dev->si_drv1; - mtx = mtx_pool_find(mtxpool_sleep, uio); + mtx = mtx_pool_find(mtxpool_sleep, &uio_tmp); #if __FreeBSD_version > 900017 dev_refthread(dev, &ref); @@ -172,15 +150,48 @@ nvme_ns_physio(struct cdev *dev, struct dev_refthread(dev); #endif - mtx_lock(mtx); - if (uio->uio_rw == UIO_READ) - err = nvme_read_uio(ns, uio); - else - err = nvme_write_uio(ns, uio); + /* + * NVM Express doesn't really support true SGLs. All SG elements + * must be PAGE_SIZE, except for the first and last element. + * Because of this, we need to break up each iovec into a separate + * NVMe command - otherwise we could end up with sub-PAGE_SIZE + * elements in the middle of an SGL which is not allowed. + */ + uio_tmp.uio_iov = &uio_iov_tmp; + uio_tmp.uio_iovcnt = 1; + uio_tmp.uio_offset = uio->uio_offset; + uio_tmp.uio_segflg = uio->uio_segflg; + uio_tmp.uio_rw = uio->uio_rw; + uio_tmp.uio_td = uio->uio_td; - if (err == 0) - msleep(uio, mtx, PRIBIO, "nvme_physio", 0); - mtx_unlock(mtx); + for (i = 0; i < uio->uio_iovcnt; i++) { + + uio_iov_tmp.iov_base = uio->uio_iov[i].iov_base; + uio_iov_tmp.iov_len = uio->uio_iov[i].iov_len; + uio_tmp.uio_resid = uio_iov_tmp.iov_len; + + mtx_lock(mtx); + + if (uio->uio_rw == UIO_READ) + nvme_err = nvme_read_uio(ns, &uio_tmp); + else + nvme_err = nvme_write_uio(ns, &uio_tmp); + + if (nvme_err == 0) + msleep(&uio_tmp, mtx, PRIBIO, "nvme_physio", 0); + + mtx_unlock(mtx); + + if (uio_tmp.uio_resid == 0) { + uio->uio_resid -= uio_iov_tmp.iov_len; + uio->uio_offset += uio_iov_tmp.iov_len; + } else { + physio_err = EFAULT; + break; + } + + uio_tmp.uio_offset += uio_iov_tmp.iov_len; + } #if __FreeBSD_version > 900017 dev_relthread(dev, ref); @@ -188,9 +199,6 @@ nvme_ns_physio(struct cdev *dev, struct dev_relthread(dev); #endif - if (err == 0) - uio->uio_resid = 0; - PRELE(curproc); - return (0); + return (physio_err); }