Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 18 Oct 2012 00:40:41 +0000 (UTC)
From:      Jim Harris <jimharris@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r241661 - head/sys/dev/nvme
Message-ID:  <201210180040.q9I0efvo001229@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jimharris
Date: Thu Oct 18 00:40:40 2012
New Revision: 241661
URL: http://svn.freebsd.org/changeset/base/241661

Log:
  Cleanup uio-related code to use struct nvme_request and
  nvme_ctrlr_submit_io_request().
  
  While here, also fix case where a uio may have more than 1 iovec.
  NVMe's definition of SGEs (called PRPs) only allows for the first SGE to
  start on a non-page boundary.  The simplest way to handle this is to
  construct a temporary uio for each iovec, and submit an NVMe request
  for each.
  
  Sponsored by:	Intel

Modified:
  head/sys/dev/nvme/nvme.c
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/dev/nvme/nvme_private.h
  head/sys/dev/nvme/nvme_qpair.c
  head/sys/dev/nvme/nvme_uio.c

Modified: head/sys/dev/nvme/nvme.c
==============================================================================
--- head/sys/dev/nvme/nvme.c	Thu Oct 18 00:39:29 2012	(r241660)
+++ head/sys/dev/nvme/nvme.c	Thu Oct 18 00:40:40 2012	(r241661)
@@ -230,15 +230,11 @@ nvme_dump_completion(struct nvme_complet
 void
 nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
 {
-	struct nvme_tracker 	*tr;
-	struct nvme_qpair 	*qpair;
+	struct nvme_tracker 	*tr = arg;
 	uint32_t		cur_nseg;
 
 	KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
 
-	tr = (struct nvme_tracker *)arg;
-	qpair = tr->qpair;
-
 	/*
 	 * Note that we specified PAGE_SIZE for alignment and max
 	 *  segment size when creating the bus dma tags.  So here
@@ -259,7 +255,7 @@ nvme_payload_map(void *arg, bus_dma_segm
 		}
 	}
 
-	nvme_qpair_submit_cmd(qpair, tr);
+	nvme_qpair_submit_cmd(tr->qpair, tr);
 }
 
 static int

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c	Thu Oct 18 00:39:29 2012	(r241660)
+++ head/sys/dev/nvme/nvme_ctrlr.c	Thu Oct 18 00:40:40 2012	(r241661)
@@ -833,12 +833,21 @@ nvme_ctrlr_submit_io_request(struct nvme
 
 	tr->req = req;
 
-	if (req->payload_size > 0) {
-		err = bus_dmamap_load(tr->qpair->dma_tag, tr->payload_dma_map,
-				      req->payload, req->payload_size,
-				      nvme_payload_map, tr, 0);
+	if (req->uio == NULL) {
+		if (req->payload_size > 0) {
+			err = bus_dmamap_load(tr->qpair->dma_tag,
+					      tr->payload_dma_map, req->payload,
+					      req->payload_size,
+					      nvme_payload_map, tr, 0);
+			if (err != 0)
+				panic("bus_dmamap_load returned non-zero!\n");
+		} else
+			nvme_qpair_submit_cmd(tr->qpair, tr);
+	} else {
+		err = bus_dmamap_load_uio(tr->qpair->dma_tag,
+					  tr->payload_dma_map, req->uio,
+					  nvme_payload_map_uio, tr, 0);
 		if (err != 0)
 			panic("bus_dmamap_load returned non-zero!\n");
-	} else
-		nvme_qpair_submit_cmd(tr->qpair, tr);
+	}
 }

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h	Thu Oct 18 00:39:29 2012	(r241660)
+++ head/sys/dev/nvme/nvme_private.h	Thu Oct 18 00:40:40 2012	(r241661)
@@ -101,6 +101,7 @@ struct nvme_request {
 	struct nvme_command		cmd;
 	void				*payload;
 	uint32_t			payload_size;
+	struct uio			*uio;
 	nvme_cb_fn_t			cb_fn;
 	void				*cb_arg;
 	SLIST_ENTRY(nvme_request)	slist;
@@ -333,6 +334,8 @@ void	nvme_ctrlr_cmd_asynchronous_event_r
 
 void	nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg,
 			 int error);
+void	nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg,
+			     bus_size_t mapsize, int error);
 
 int	nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev);
 int	nvme_ctrlr_reset(struct nvme_controller *ctrlr);
@@ -392,6 +395,22 @@ nvme_allocate_request(void *payload, uin
 	return (req);
 }
 
+static __inline struct nvme_request *
+nvme_allocate_request_uio(struct uio *uio, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+	struct nvme_request *req;
+
+	req = uma_zalloc(nvme_request_zone, M_NOWAIT | M_ZERO);
+	if (req == NULL)
+		return (NULL);
+
+	req->uio = uio;
+	req->cb_fn = cb_fn;
+	req->cb_arg = cb_arg;
+
+	return (req);
+}
+
 #define nvme_free_request(req)	uma_zfree(nvme_request_zone, req)
 
 #endif /* __NVME_PRIVATE_H__ */

Modified: head/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- head/sys/dev/nvme/nvme_qpair.c	Thu Oct 18 00:39:29 2012	(r241660)
+++ head/sys/dev/nvme/nvme_qpair.c	Thu Oct 18 00:40:40 2012	(r241661)
@@ -163,7 +163,7 @@ nvme_qpair_process_completions(struct nv
 			/* nvme_qpair_submit_cmd() will release the lock. */
 			nvme_qpair_submit_cmd(qpair, tr);
 		else {
-			if (req->payload_size > 0)
+			if (req->payload_size > 0 || req->uio != NULL)
 				bus_dmamap_unload(qpair->dma_tag,
 				    tr->payload_dma_map);
 

Modified: head/sys/dev/nvme/nvme_uio.c
==============================================================================
--- head/sys/dev/nvme/nvme_uio.c	Thu Oct 18 00:39:29 2012	(r241660)
+++ head/sys/dev/nvme/nvme_uio.c	Thu Oct 18 00:40:40 2012	(r241661)
@@ -38,8 +38,10 @@ static void
 nvme_uio_done(void *arg, const struct nvme_completion *status)
 {
 	struct mtx *mtx;
+	struct uio *uio = arg;
 
-	/* TODO: update uio flags based on status */
+	if (status->sf_sc == 0 && status->sf_sct == 0)
+		uio->uio_resid = 0;
 
 	mtx = mtx_pool_find(mtxpool_sleep, arg);
 	mtx_lock(mtx);
@@ -47,33 +49,17 @@ nvme_uio_done(void *arg, const struct nv
 	mtx_unlock(mtx);
 }
 
-static struct nvme_tracker *
-nvme_allocate_tracker_uio(struct nvme_controller *ctrlr, struct uio *uio,
-    struct nvme_request *req)
-{
-	struct nvme_tracker 	*tr;
-	struct nvme_qpair	*qpair;
-
-	if (ctrlr->per_cpu_io_queues)
-		qpair = &ctrlr->ioq[curcpu];
-	else
-		qpair = &ctrlr->ioq[0];
-
-	tr = nvme_qpair_allocate_tracker(qpair);
-
-	if (tr == NULL)
-		return (NULL);
-
-	tr->qpair = qpair;
-	tr->req = req;
-
-	return (tr);
-}
-
-static void
+void
 nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg,
     bus_size_t mapsize, int error)
 {
+	struct nvme_tracker	*tr = arg;
+
+	/*
+	 * Now that we know the actual size of the uio, divide it by the
+	 *  sector size that we stored in cdw12.
+	 */
+	tr->req->cmd.cdw12 = (mapsize / tr->req->cmd.cdw12)-1;
 	nvme_payload_map(arg, seg, nseg, error);
 }
 
@@ -81,20 +67,12 @@ static int
 nvme_read_uio(struct nvme_namespace *ns, struct uio *uio)
 {
 	struct nvme_request	*req;
-	struct nvme_tracker	*tr;
 	struct nvme_command	*cmd;
-	int			err, i;
-	uint64_t		lba, iosize = 0;
-
-	for (i = 0; i < uio->uio_iovcnt; i++) {
-		iosize += uio->uio_iov[i].iov_len;
-	}
-
-	req = nvme_allocate_request(NULL, iosize, nvme_uio_done, uio);
+	uint64_t		lba;
 
-	tr = nvme_allocate_tracker_uio(ns->ctrlr, uio, req);
+	req = nvme_allocate_request_uio(uio, nvme_uio_done, uio);
 
-	if (tr == NULL)
+	if (req == NULL)
 		return (ENOMEM);
 
 	cmd = &req->cmd;
@@ -103,13 +81,16 @@ nvme_read_uio(struct nvme_namespace *ns,
 	lba = uio->uio_offset / nvme_ns_get_sector_size(ns);
 
 	*(uint64_t *)&cmd->cdw10 = lba;
+	/*
+	 * Store the sector size in cdw12 (where the LBA count normally goes).
+	 *  We'll adjust cdw12 in the map_uio callback based on the mapsize
+	 *  parameter.  This allows us to not have to store the namespace
+	 *  in the request simply to get the sector size in the map_uio
+	 *  callback.
+	 */
+	cmd->cdw12 = nvme_ns_get_sector_size(ns);
 
-	cmd->cdw12 = (iosize / nvme_ns_get_sector_size(ns))-1;
-
-	err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, uio,
-	    nvme_payload_map_uio, tr, 0);
-
-	KASSERT(err == 0, ("bus_dmamap_load_uio returned non-zero!\n"));
+	nvme_ctrlr_submit_io_request(ns->ctrlr, req);
 
 	return (0);
 }
@@ -118,20 +99,12 @@ static int
 nvme_write_uio(struct nvme_namespace *ns, struct uio *uio)
 {
 	struct nvme_request	*req;
-	struct nvme_tracker	*tr;
 	struct nvme_command	*cmd;
-	int			err, i;
-	uint64_t		lba, iosize = 0;
+	uint64_t		lba;
 
-	for (i = 0; i < uio->uio_iovcnt; i++) {
-		iosize += uio->uio_iov[i].iov_len;
-	}
-
-	req = nvme_allocate_request(NULL, iosize, nvme_uio_done, uio);
-
-	tr = nvme_allocate_tracker_uio(ns->ctrlr, uio, req);
+	req = nvme_allocate_request_uio(uio, nvme_uio_done, uio);
 
-	if (tr == NULL)
+	if (req == NULL)
 		return (ENOMEM);
 
 	cmd = &req->cmd;
@@ -140,13 +113,16 @@ nvme_write_uio(struct nvme_namespace *ns
 	lba = uio->uio_offset / nvme_ns_get_sector_size(ns);
 
 	*(uint64_t *)&cmd->cdw10 = lba;
+	/*
+	 * Store the sector size in cdw12 (where the LBA count normally goes).
+	 *  We'll adjust cdw12 in the map_uio callback based on the mapsize
+	 *  parameter.  This allows us to not have to store the namespace
+	 *  in the request simply to get the sector size in the map_uio
+	 *  callback.
+	 */
+	cmd->cdw12 = nvme_ns_get_sector_size(ns);
 
-	cmd->cdw12 = (iosize / nvme_ns_get_sector_size(ns))-1;
-
-	err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, uio,
-	    nvme_payload_map_uio, tr, 0);
-
-	KASSERT(err == 0, ("bus_dmamap_load_uio returned non-zero!\n"));
+	nvme_ctrlr_submit_io_request(ns->ctrlr, req);
 
 	return (0);
 }
@@ -154,9 +130,11 @@ nvme_write_uio(struct nvme_namespace *ns
 int
 nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag)
 {
+	struct uio		uio_tmp;
+	struct iovec		uio_iov_tmp;
 	struct nvme_namespace	*ns;
 	struct mtx		*mtx;
-	int			err;
+	int			i, nvme_err, physio_err = 0;
 #if __FreeBSD_version > 900017
 	int			ref;
 #endif
@@ -164,7 +142,7 @@ nvme_ns_physio(struct cdev *dev, struct 
 	PHOLD(curproc);
 
 	ns = dev->si_drv1;
-	mtx = mtx_pool_find(mtxpool_sleep, uio);
+	mtx = mtx_pool_find(mtxpool_sleep, &uio_tmp);
 
 #if __FreeBSD_version > 900017
 	dev_refthread(dev, &ref);
@@ -172,15 +150,48 @@ nvme_ns_physio(struct cdev *dev, struct 
 	dev_refthread(dev);
 #endif
 
-	mtx_lock(mtx);
-	if (uio->uio_rw == UIO_READ)
-		err = nvme_read_uio(ns, uio);
-	else
-		err = nvme_write_uio(ns, uio);
+	/*
+	 * NVM Express doesn't really support true SGLs.  All SG elements
+	 *  must be PAGE_SIZE, except for the first and last element.
+	 *  Because of this, we need to break up each iovec into a separate
+	 *  NVMe command - otherwise we could end up with sub-PAGE_SIZE
+	 *  elements in the middle of an SGL which is not allowed.
+	 */
+	uio_tmp.uio_iov = &uio_iov_tmp;
+	uio_tmp.uio_iovcnt = 1;
+	uio_tmp.uio_offset = uio->uio_offset;
+	uio_tmp.uio_segflg = uio->uio_segflg;
+	uio_tmp.uio_rw = uio->uio_rw;
+	uio_tmp.uio_td = uio->uio_td;
 
-	if (err == 0)
-		msleep(uio, mtx, PRIBIO, "nvme_physio", 0);
-	mtx_unlock(mtx);
+	for (i = 0; i < uio->uio_iovcnt; i++) {
+
+		uio_iov_tmp.iov_base = uio->uio_iov[i].iov_base;
+		uio_iov_tmp.iov_len = uio->uio_iov[i].iov_len;
+		uio_tmp.uio_resid = uio_iov_tmp.iov_len;
+
+		mtx_lock(mtx);
+
+		if (uio->uio_rw == UIO_READ)
+			nvme_err = nvme_read_uio(ns, &uio_tmp);
+		else
+			nvme_err = nvme_write_uio(ns, &uio_tmp);
+
+		if (nvme_err == 0)
+			msleep(&uio_tmp, mtx, PRIBIO, "nvme_physio", 0);
+
+		mtx_unlock(mtx);
+
+		if (uio_tmp.uio_resid == 0) {
+			uio->uio_resid -= uio_iov_tmp.iov_len;
+			uio->uio_offset += uio_iov_tmp.iov_len;
+		} else {
+			physio_err = EFAULT;
+			break;
+		}
+
+		uio_tmp.uio_offset += uio_iov_tmp.iov_len;
+	}
 
 #if __FreeBSD_version > 900017
 	dev_relthread(dev, ref);
@@ -188,9 +199,6 @@ nvme_ns_physio(struct cdev *dev, struct 
 	dev_relthread(dev);
 #endif
 
-	if (err == 0)
-		uio->uio_resid = 0;
-
 	PRELE(curproc);
-	return (0);
+	return (physio_err);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201210180040.q9I0efvo001229>