Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 14 Apr 2012 05:48:05 +0000 (UTC)
From:      Peter Grehan <grehan@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r234270 - in head/sys/dev/virtio: . balloon block network pci
Message-ID:  <201204140548.q3E5m5Qn015526@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: grehan
Date: Sat Apr 14 05:48:04 2012
New Revision: 234270
URL: http://svn.freebsd.org/changeset/base/234270

Log:
  Catch up with Bryan Venteicher's virtio git repo:
  
  a8af6270bd96be6ccd86f70b60fa6512b710e4f0
        virtio_blk: Include function name in panic string
  
  cbdb03a694b76c5253d7ae3a59b9995b9afbb67a
        virtio_balloon: Do the notify outside of the lock
  
        By the time we return from virtqueue_notify(), the descriptor
        will be in the used ring so we shouldn't have to sleep.
  
  10ba392e60692529a5cbc1e9987e4064e0128447
        virtio: Use DEVMETHOD_END
  
  80cbcc4d6552cac758be67f0c99c36f23ce62110
        virtqueue: Add support for VIRTIO_F_RING_EVENT_IDX
  
        This can be used to reduce the number of guest/host and
        host/guest interrupts by delaying the interrupt until a
        certain index value is reached.
  
        Actual use by the network driver will come along later.
  
  8fc465969acc0c58477153e4c3530390db436c02
        virtqueue: Simplify virtqueue_nused()
  
        Since the values just wrap naturally at UINT16_MAX, we
        can just subtract the two values directly, rather than
        doing 2's complement math.
  
  a8aa22f25959e2767d006cd621b69050e7ffb0ae
        virtio_blk: Remove debugging crud from 75dd732a
  
        There seems to be an issue with Qemu (or FreeBSD VirtIO) that sets
        the PCI register space for the device config to bogus values. This
        only seems to happen after unloading and reloading the module.
  
  d404800661cb2a9769c033f8a50b2133934501aa
        virtio_blk: Use better variable name
  
  75dd732a97743d96e7c63f7ced3c2169696dadd3
        virtio_blk: Partially revert 92ba40e65
  
        Just use the virtqueue to determine if any requests are
        still inflight.
  
  06661ed66b7a9efaea240f99f414c368f1bbcdc7
        virtio_blk: error if allowed too few segments
  
        Should never happen unless the host provides use with a
        bogus seg_max value.
  
  4b33e5085bc87a818433d7e664a0a2c8f56a1a89
        virtio_blk: Sort function declarations
  
  426b9f5cac892c9c64cc7631966461514f7e08c6
        virtio_blk: Cleanup whitespace
  
  617c23e12c61e3c2233d942db713c6b8ff0bd112
        virtio_blk: Call disk_err() on error'd completed requests
  
  081a5712d4b2e0abf273be4d26affcf3870263a9
        virtio_blk: ASSERT the ready and inflight request queues are empty
  
  a9be2631a4f770a84145c18ee03a3f103bed4ca8
        virtio_blk: Simplify check for too many segments
  
        At the cost of a small style violation.
  
  e00ec09da014f2e60cc75542d0ab78898672d521
        virtio_blk: Add beginnings of suspend/resume
  
        Still not sure if we need to virtio_stop()/virtio_reinit()
        the device before/after a suspend.
  
        Don't start additional IO when marked as suspending.
  
  47c71dc6ce8c238aa59ce8afd4bda5aa294bc884
        virtio_blk: Panic when dealt an unhandled BIO cmd
  
  1055544f90fb8c0cc6a2395f5b6104039606aafe
        virtio_blk: Add VQ enqueue/dequeue wrappers
  
        Wrapper functions managed the added/removing to the in-flight
        list of requests.
  
        Normally biodone() any completed IO when draining the virtqueue.
  
  92ba40e65b3bb5e4acb9300ece711f1ea8f3f7f4
        virtio_blk: Add in-flight list of requests
  
  74f6d260e075443544522c0833dc2712dd93f49b
        virtio_blk: Rename VTBLK_FLAG_DETACHING to VTBLK_FLAG_DETACH
  
  7aa549050f6fc6551c09c6362ed6b2a0728956ef
        virtio_blk: Finish all BIOs through vtblk_finish_bio()
  
        Also properly set bio_resid in the case of errors. Most geom_disk
        providers seem to do the same.
  
  9eef6d0e6f7e5dd362f71ba097f2e2e4c3744882
        Added function to translate VirtIO status to error code
  
  ef06adc337f31e1129d6d5f26de6d8d1be27bcd2
        Reset dumping flag when given unexpected parameters
  
  393b3e390c644193a2e392220dcc6a6c50b212d9
        Added missing VTBLK_LOCK() in dump handler
  
  Obtained from:	Bryan Venteicher  bryanv at daemoninthecloset dot org

Modified:
  head/sys/dev/virtio/balloon/virtio_balloon.c
  head/sys/dev/virtio/block/virtio_blk.c
  head/sys/dev/virtio/network/if_vtnet.c
  head/sys/dev/virtio/pci/virtio_pci.c
  head/sys/dev/virtio/virtio_ring.h
  head/sys/dev/virtio/virtqueue.c
  head/sys/dev/virtio/virtqueue.h

Modified: head/sys/dev/virtio/balloon/virtio_balloon.c
==============================================================================
--- head/sys/dev/virtio/balloon/virtio_balloon.c	Sat Apr 14 04:40:11 2012	(r234269)
+++ head/sys/dev/virtio/balloon/virtio_balloon.c	Sat Apr 14 05:48:04 2012	(r234270)
@@ -122,6 +122,9 @@ static void	vtballoon_add_sysctl(struct 
  */
 #define VTBALLOON_PAGES_PER_REQUEST	256
 
+/* Must be able to fix all pages frames in one page (segment). */
+CTASSERT(VTBALLOON_PAGES_PER_REQUEST * sizeof(uint32_t) <= PAGE_SIZE);
+
 #define VTBALLOON_MTX(_sc)		&(_sc)->vtballoon_mtx
 #define VTBALLOON_LOCK_INIT(_sc, _name)	mtx_init(VTBALLOON_MTX((_sc)), _name, \
 					    "VirtIO Balloon Lock", MTX_SPIN)
@@ -138,7 +141,7 @@ static device_method_t vtballoon_methods
 	/* VirtIO methods. */
 	DEVMETHOD(virtio_config_change, vtballoon_config_change),
 
-	{ 0, 0 }
+	DEVMETHOD_END
 };
 
 static driver_t vtballoon_driver = {
@@ -402,13 +405,13 @@ vtballoon_send_page_frames(struct vtball
 
 	error = virtqueue_enqueue(vq, vq, &sg, 1, 0);
 	KASSERT(error == 0, ("error enqueuing page frames to virtqueue"));
+	virtqueue_notify(vq);
 
 	/*
 	 * Inflate and deflate operations are done synchronously. The
 	 * interrupt handler will wake us up.
 	 */
 	VTBALLOON_LOCK(sc);
-	virtqueue_notify(vq);
 
 	while ((c = virtqueue_dequeue(vq, NULL)) == NULL)
 		msleep_spin(sc, VTBALLOON_MTX(sc), "vtbspf", 0);

Modified: head/sys/dev/virtio/block/virtio_blk.c
==============================================================================
--- head/sys/dev/virtio/block/virtio_blk.c	Sat Apr 14 04:40:11 2012	(r234269)
+++ head/sys/dev/virtio/block/virtio_blk.c	Sat Apr 14 05:48:04 2012	(r234270)
@@ -70,8 +70,8 @@ struct vtblk_softc {
 	uint32_t		 vtblk_flags;
 #define VTBLK_FLAG_INDIRECT	0x0001
 #define VTBLK_FLAG_READONLY	0x0002
-#define VTBLK_FLAG_DETACHING	0x0004
-#define VTBLK_FLAG_SUSPENDED	0x0008
+#define VTBLK_FLAG_DETACH	0x0004
+#define VTBLK_FLAG_SUSPEND	0x0008
 #define VTBLK_FLAG_DUMPING	0x0010
 
 	struct virtqueue	*vtblk_vq;
@@ -82,7 +82,7 @@ struct vtblk_softc {
 	TAILQ_HEAD(, vtblk_request)
 				 vtblk_req_free;
 	TAILQ_HEAD(, vtblk_request)
-				 vtblk_req_ready;
+				vtblk_req_ready;
 
 	struct taskqueue	*vtblk_tq;
 	struct task		 vtblk_intr_task;
@@ -116,6 +116,13 @@ static int	vtblk_suspend(device_t);
 static int	vtblk_resume(device_t);
 static int	vtblk_shutdown(device_t);
 
+static int	vtblk_open(struct disk *);
+static int	vtblk_close(struct disk *);
+static int	vtblk_ioctl(struct disk *, u_long, void *, int,
+	            struct thread *);
+static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
+static void	vtblk_strategy(struct bio *);
+
 static void	vtblk_negotiate_features(struct vtblk_softc *);
 static int	vtblk_maximum_segments(struct vtblk_softc *,
 		    struct virtio_blk_config *);
@@ -124,13 +131,7 @@ static void	vtblk_alloc_disk(struct vtbl
 		    struct virtio_blk_config *);
 static void	vtblk_create_disk(struct vtblk_softc *);
 
-static int	vtblk_open(struct disk *);
-static int	vtblk_close(struct disk *);
-static int	vtblk_ioctl(struct disk *, u_long, void *, int,
-		    struct thread *);
-static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
-static void	vtblk_strategy(struct bio *);
-
+static int	vtblk_quiesce(struct vtblk_softc *);
 static void	vtblk_startio(struct vtblk_softc *);
 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *);
 static int	vtblk_execute_request(struct vtblk_softc *,
@@ -148,6 +149,7 @@ static int	vtblk_flush_dump(struct vtblk
 static int	vtblk_poll_request(struct vtblk_softc *,
 		    struct vtblk_request *);
 
+static void	vtblk_finish_completed(struct vtblk_softc *);
 static void	vtblk_drain_vq(struct vtblk_softc *, int);
 static void	vtblk_drain(struct vtblk_softc *);
 
@@ -161,7 +163,8 @@ static struct vtblk_request * vtblk_dequ
 static void	vtblk_enqueue_ready(struct vtblk_softc *,
 		    struct vtblk_request *);
 
-static void	vtblk_bio_error(struct bio *, int);
+static int	vtblk_request_error(struct vtblk_request *);
+static void	vtblk_finish_bio(struct bio *, int);
 
 /* Tunables. */
 static int vtblk_no_ident = 0;
@@ -189,9 +192,8 @@ TUNABLE_INT("hw.vtblk.no_ident", &vtblk_
 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
 				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
 
-#define VTBLK_BIO_SEGMENTS(_bp)	sglist_count((_bp)->bio_data, (_bp)->bio_bcount)
-
 #define VTBLK_DISK_NAME		"vtbd"
+#define	VTBLK_QUIESCE_TIMEOUT	(30 * hz)
 
 /*
  * Each block request uses at least two segments - one for the header
@@ -210,7 +212,7 @@ static device_method_t vtblk_methods[] =
 	DEVMETHOD(device_resume,	vtblk_resume),
 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
 
-	{ 0, 0 }
+	DEVMETHOD_END
 };
 
 static driver_t vtblk_driver = {
@@ -314,11 +316,13 @@ vtblk_attach(device_t dev)
 	}
 
 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
+        if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
+		error = EINVAL;
+		device_printf(dev, "fewer than minimum number of segments "
+		    "allowed: %d\n", sc->vtblk_max_nsegs);
+		goto fail;
+	}
 
-	/*
-	 * Allocate working sglist. The number of segments may be too
-	 * large to safely store on the stack.
-	 */
 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
 	if (sc->vtblk_sglist == NULL) {
 		error = ENOMEM;
@@ -376,7 +380,7 @@ vtblk_detach(device_t dev)
 	sc = device_get_softc(dev);
 
 	VTBLK_LOCK(sc);
-	sc->vtblk_flags |= VTBLK_FLAG_DETACHING;
+	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
 	if (device_is_attached(dev))
 		vtblk_stop(sc);
 	VTBLK_UNLOCK(sc);
@@ -408,15 +412,19 @@ static int
 vtblk_suspend(device_t dev)
 {
 	struct vtblk_softc *sc;
+	int error;
 
 	sc = device_get_softc(dev);
 
 	VTBLK_LOCK(sc);
-	sc->vtblk_flags |= VTBLK_FLAG_SUSPENDED;
-	/* TODO Wait for any inflight IO to complete? */
+	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
+	/* XXX BMV: virtio_stop(), etc needed here? */
+	error = vtblk_quiesce(sc);
+	if (error)
+		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
 	VTBLK_UNLOCK(sc);
 
-	return (0);
+	return (error);
 }
 
 static int
@@ -427,8 +435,9 @@ vtblk_resume(device_t dev)
 	sc = device_get_softc(dev);
 
 	VTBLK_LOCK(sc);
-	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPENDED;
-	/* TODO Resume IO? */
+	/* XXX BMV: virtio_reinit(), etc needed here? */
+	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
+	vtblk_startio(sc);
 	VTBLK_UNLOCK(sc);
 
 	return (0);
@@ -449,7 +458,7 @@ vtblk_open(struct disk *dp)
 	if ((sc = dp->d_drv1) == NULL)
 		return (ENXIO);
 
-	return (sc->vtblk_flags & VTBLK_FLAG_DETACHING ? ENXIO : 0);
+	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
 }
 
 static int
@@ -489,6 +498,8 @@ vtblk_dump(void *arg, void *virtual, vm_
 	if ((sc = dp->d_drv1) == NULL)
 		return (ENXIO);
 
+	VTBLK_LOCK(sc);
+
 	if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
 		vtblk_prepare_dump(sc);
 		sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
@@ -498,6 +509,10 @@ vtblk_dump(void *arg, void *virtual, vm_
 		error = vtblk_write_dump(sc, virtual, offset, length);
 	else if (virtual == NULL && offset == 0)
 		error = vtblk_flush_dump(sc);
+	else {
+		error = EINVAL;
+		sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING;
+	}
 
 	VTBLK_UNLOCK(sc);
 
@@ -510,7 +525,7 @@ vtblk_strategy(struct bio *bp)
 	struct vtblk_softc *sc;
 
 	if ((sc = bp->bio_disk->d_drv1) == NULL) {
-		vtblk_bio_error(bp, EINVAL);
+		vtblk_finish_bio(bp, EINVAL);
 		return;
 	}
 
@@ -520,29 +535,37 @@ vtblk_strategy(struct bio *bp)
 	 */
 	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
 	    (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
-		vtblk_bio_error(bp, EROFS);
+		vtblk_finish_bio(bp, EROFS);
 		return;
 	}
 
+#ifdef	INVARIANTS
 	/*
 	 * Prevent read/write buffers spanning too many segments from
 	 * getting into the queue. This should only trip if d_maxsize
 	 * was incorrectly set.
 	 */
 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
-		KASSERT(VTBLK_BIO_SEGMENTS(bp) <= sc->vtblk_max_nsegs -
-		    VTBLK_MIN_SEGMENTS,
+		int nsegs, max_nsegs;
+		
+		nsegs = sglist_count(bp->bio_data, bp->bio_bcount);
+		max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS;
+
+		KASSERT(nsegs <= max_nsegs,
 		    ("bio spanned too many segments: %d, max: %d",
-		    VTBLK_BIO_SEGMENTS(bp),
-		    sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS));
+		    nsegs, max_nsegs));
 	}
+#endif
 
 	VTBLK_LOCK(sc);
-	if ((sc->vtblk_flags & VTBLK_FLAG_DETACHING) == 0) {
+	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
+		vtblk_finish_bio(bp, ENXIO);
+	else {
 		bioq_disksort(&sc->vtblk_bioq, bp);
-		vtblk_startio(sc);
-	} else
-		vtblk_bio_error(bp, ENXIO);
+
+		if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0)
+			vtblk_startio(sc);
+	}
 	VTBLK_UNLOCK(sc);
 }
 
@@ -669,6 +692,26 @@ vtblk_create_disk(struct vtblk_softc *sc
 	disk_create(dp, DISK_VERSION);
 }
 
+static int
+vtblk_quiesce(struct vtblk_softc *sc)
+{
+	int error;
+
+	error = 0;
+
+	VTBLK_LOCK_ASSERT(sc);
+
+	while (!virtqueue_empty(sc->vtblk_vq)) {
+		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
+		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
+			error = EBUSY;
+			break;
+		}
+	}
+
+	return (error);
+}
+
 static void
 vtblk_startio(struct vtblk_softc *sc)
 {
@@ -681,9 +724,6 @@ vtblk_startio(struct vtblk_softc *sc)
 
 	VTBLK_LOCK_ASSERT(sc);
 
-	if (sc->vtblk_flags & VTBLK_FLAG_SUSPENDED)
-		return;
-
 	while (!virtqueue_full(vq)) {
 		if ((req = vtblk_dequeue_ready(sc)) == NULL)
 			req = vtblk_bio_request(sc);
@@ -736,9 +776,8 @@ vtblk_bio_request(struct vtblk_softc *sc
 		req->vbr_hdr.sector = bp->bio_offset / 512;
 		break;
 	default:
-		KASSERT(0, ("bio with unhandled cmd: %d", bp->bio_cmd));
-		req->vbr_hdr.type = -1;
-		break;
+		panic("%s: bio with unhandled cmd: %d", __FUNCTION__,
+		    bp->bio_cmd);
 	}
 
 	if (bp->bio_flags & BIO_ORDERED)
@@ -752,7 +791,7 @@ vtblk_execute_request(struct vtblk_softc
 {
 	struct sglist *sg;
 	struct bio *bp;
-	int writable, error;
+	int readable, writable, error;
 
 	sg = sc->vtblk_sglist;
 	bp = req->vbr_bp;
@@ -783,10 +822,9 @@ vtblk_execute_request(struct vtblk_softc
 	KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
 	    ("fewer than min segments: %d", sg->sg_nseg));
 
-	error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
-	    sg->sg_nseg - writable, writable);
+	readable = sg->sg_nseg - writable;
 
-	return (error);
+	return (virtqueue_enqueue(sc->vtblk_vq, req, sg, readable, writable));
 }
 
 static int
@@ -806,37 +844,23 @@ static void
 vtblk_intr_task(void *arg, int pending)
 {
 	struct vtblk_softc *sc;
-	struct vtblk_request *req;
 	struct virtqueue *vq;
-	struct bio *bp;
 
 	sc = arg;
 	vq = sc->vtblk_vq;
 
 	VTBLK_LOCK(sc);
-	if (sc->vtblk_flags & VTBLK_FLAG_DETACHING) {
+	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
 		VTBLK_UNLOCK(sc);
 		return;
 	}
 
-	while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
-		bp = req->vbr_bp;
-
-		if (req->vbr_ack == VIRTIO_BLK_S_OK)
-			bp->bio_resid = 0;
-		else {
-			bp->bio_flags |= BIO_ERROR;
-			if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP)
-				bp->bio_error = ENOTSUP;
-			else
-				bp->bio_error = EIO;
-		}
-
-		biodone(bp);
-		vtblk_enqueue_request(sc, req);
-	}
+	vtblk_finish_completed(sc);
 
-	vtblk_startio(sc);
+	if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0)
+		vtblk_startio(sc);
+	else
+		wakeup(&sc->vtblk_vq);
 
 	if (virtqueue_enable_intr(vq) != 0) {
 		virtqueue_disable_intr(vq);
@@ -973,7 +997,6 @@ vtblk_poll_request(struct vtblk_softc *s
 {
 	device_t dev;
 	struct virtqueue *vq;
-	struct vtblk_request *r;
 	int error;
 
 	dev = sc->vtblk_dev;
@@ -988,20 +1011,37 @@ vtblk_poll_request(struct vtblk_softc *s
 
 	virtqueue_notify(vq);
 
-	r = virtqueue_poll(vq, NULL);
-	KASSERT(r == req, ("unexpected request response"));
+	req = virtqueue_poll(vq, NULL);
 
-	if (req->vbr_ack != VIRTIO_BLK_S_OK) {
-		error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO;
-		if (bootverbose)
-			device_printf(dev,
-			    "vtblk_poll_request: IO error: %d\n", error);
+	error = vtblk_request_error(req);
+	if (error && bootverbose) {
+		device_printf(dev, "vtblk_poll_request: IO error: %d\n",
+		    error);
 	}
 
 	return (error);
 }
 
 static void
+vtblk_finish_completed(struct vtblk_softc *sc)
+{
+	struct vtblk_request *req;
+	struct bio *bp;
+	int error;
+
+	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
+		bp = req->vbr_bp;
+
+		error = vtblk_request_error(req);
+		if (error)
+			disk_err(bp, "hard error", -1, 1);
+
+		vtblk_finish_bio(bp, error);
+		vtblk_enqueue_request(sc, req);
+	}
+}
+
+static void
 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
 {
 	struct virtqueue *vq;
@@ -1013,7 +1053,7 @@ vtblk_drain_vq(struct vtblk_softc *sc, i
 
 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
 		if (!skip_done)
-			vtblk_bio_error(req->vbr_bp, ENXIO);
+			vtblk_finish_bio(req->vbr_bp, ENXIO);
 
 		vtblk_enqueue_request(sc, req);
 	}
@@ -1030,17 +1070,19 @@ vtblk_drain(struct vtblk_softc *sc)
 
 	bioq = &sc->vtblk_bioq;
 
-	if (sc->vtblk_vq != NULL)
+	if (sc->vtblk_vq != NULL) {
+		vtblk_finish_completed(sc);
 		vtblk_drain_vq(sc, 0);
+	}
 
 	while ((req = vtblk_dequeue_ready(sc)) != NULL) {
-		vtblk_bio_error(req->vbr_bp, ENXIO);
+		vtblk_finish_bio(req->vbr_bp, ENXIO);
 		vtblk_enqueue_request(sc, req);
 	}
 
 	while (bioq_first(bioq) != NULL) {
 		bp = bioq_takefirst(bioq);
-		vtblk_bio_error(bp, ENXIO);
+		vtblk_finish_bio(bp, ENXIO);
 	}
 
 	vtblk_free_requests(sc);
@@ -1050,9 +1092,9 @@ static int
 vtblk_alloc_requests(struct vtblk_softc *sc)
 {
 	struct vtblk_request *req;
-	int i, size;
+	int i, nreqs;
 
-	size = virtqueue_size(sc->vtblk_vq);
+	nreqs = virtqueue_size(sc->vtblk_vq);
 
 	/*
 	 * Preallocate sufficient requests to keep the virtqueue full. Each
@@ -1060,9 +1102,9 @@ vtblk_alloc_requests(struct vtblk_softc 
 	 * the number allocated when indirect descriptors are not available.
 	 */
 	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
-		size /= VTBLK_MIN_SEGMENTS;
+		nreqs /= VTBLK_MIN_SEGMENTS;
 
-	for (i = 0; i < size; i++) {
+	for (i = 0; i < nreqs; i++) {
 		req = uma_zalloc(vtblk_req_zone, M_NOWAIT);
 		if (req == NULL)
 			return (ENOMEM);
@@ -1079,6 +1121,9 @@ vtblk_free_requests(struct vtblk_softc *
 {
 	struct vtblk_request *req;
 
+	KASSERT(TAILQ_EMPTY(&sc->vtblk_req_ready),
+	    ("ready requests left on queue"));
+
 	while ((req = vtblk_dequeue_request(sc)) != NULL) {
 		sc->vtblk_request_count--;
 		uma_zfree(vtblk_req_zone, req);
@@ -1126,9 +1171,35 @@ vtblk_enqueue_ready(struct vtblk_softc *
 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
 }
 
+static int
+vtblk_request_error(struct vtblk_request *req)
+{
+	int error;
+
+	switch (req->vbr_ack) {
+	case VIRTIO_BLK_S_OK:
+		error = 0;
+		break;
+	case VIRTIO_BLK_S_UNSUPP:
+		error = ENOTSUP;
+		break;
+	default:
+		error = EIO;
+		break;
+	}
+
+	return (error);
+}
+
 static void
-vtblk_bio_error(struct bio *bp, int error)
+vtblk_finish_bio(struct bio *bp, int error)
 {
 
-	biofinish(bp, NULL, error);
+	if (error) {
+		bp->bio_resid = bp->bio_bcount;
+		bp->bio_error = error;
+		bp->bio_flags |= BIO_ERROR;
+	}
+
+	biodone(bp);
 }

Modified: head/sys/dev/virtio/network/if_vtnet.c
==============================================================================
--- head/sys/dev/virtio/network/if_vtnet.c	Sat Apr 14 04:40:11 2012	(r234269)
+++ head/sys/dev/virtio/network/if_vtnet.c	Sat Apr 14 05:48:04 2012	(r234270)
@@ -223,7 +223,7 @@ static device_method_t vtnet_methods[] =
 	/* VirtIO methods. */
 	DEVMETHOD(virtio_config_change, vtnet_config_change),
 
-	{ 0, 0 }
+	DEVMETHOD_END
 };
 
 static driver_t vtnet_driver = {

Modified: head/sys/dev/virtio/pci/virtio_pci.c
==============================================================================
--- head/sys/dev/virtio/pci/virtio_pci.c	Sat Apr 14 04:40:11 2012	(r234269)
+++ head/sys/dev/virtio/pci/virtio_pci.c	Sat Apr 14 05:48:04 2012	(r234270)
@@ -189,7 +189,7 @@ static device_method_t vtpci_methods[] =
 	DEVMETHOD(virtio_bus_read_device_config,  vtpci_read_dev_config),
 	DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
 
-	{ 0, 0 }
+	DEVMETHOD_END
 };
 
 static driver_t vtpci_driver = {

Modified: head/sys/dev/virtio/virtio_ring.h
==============================================================================
--- head/sys/dev/virtio/virtio_ring.h	Sat Apr 14 04:40:11 2012	(r234269)
+++ head/sys/dev/virtio/virtio_ring.h	Sat Apr 14 05:48:04 2012	(r234270)
@@ -103,6 +103,7 @@ struct vring {
  *      __u16 avail_flags;
  *      __u16 avail_idx;
  *      __u16 available[num];
+ *      __u16 used_event_idx;
  *
  *      // Padding to the next align boundary.
  *      char pad[];
@@ -111,11 +112,19 @@ struct vring {
  *      __u16 used_flags;
  *      __u16 used_idx;
  *      struct vring_used_elem used[num];
+ *      __u16 avail_event_idx;
  * };
  *
  * NOTE: for VirtIO PCI, align is 4096.
  */
 
+/*
+ * We publish the used event index at the end of the available ring, and vice
+ * versa. They are at the end for backwards compatibility.
+ */
+#define	vring_used_event(vr)	((vr)->avail->ring[(vr)->num])
+#define	vring_avail_event(vr)	(*(uint16_t *)&(vr)->used->ring[(vr)->num])
+
 static inline int
 vring_size(unsigned int num, unsigned long align)
 {
@@ -140,4 +149,18 @@ vring_init(struct vring *vr, unsigned in
         vr->used = (void *)
 	    (((unsigned long) &vr->avail->ring[num] + align-1) & ~(align-1));
 }
+
+/*
+ * The following is used with VIRTIO_RING_F_EVENT_IDX.
+ *
+ * Assuming a given event_idx value from the other size, if we have
+ * just incremented index from old to new_idx, should we trigger an
+ * event?
+ */
+static inline int
+vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old)
+{
+
+	return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old);
+}
 #endif /* VIRTIO_RING_H */

Modified: head/sys/dev/virtio/virtqueue.c
==============================================================================
--- head/sys/dev/virtio/virtqueue.c	Sat Apr 14 04:40:11 2012	(r234269)
+++ head/sys/dev/virtio/virtqueue.c	Sat Apr 14 05:48:04 2012	(r234270)
@@ -60,6 +60,7 @@ struct virtqueue {
 	uint16_t		 vq_nentries;
 	uint32_t		 vq_flags;
 #define	VIRTQUEUE_FLAG_INDIRECT	 0x0001
+#define	VIRTQUEUE_FLAG_EVENT_IDX 0x0002
 
 	int			 vq_alignment;
 	int			 vq_ring_size;
@@ -126,7 +127,8 @@ static uint16_t	vq_ring_enqueue_segments
 static int	vq_ring_use_indirect(struct virtqueue *, int);
 static void	vq_ring_enqueue_indirect(struct virtqueue *, void *,
 		    struct sglist *, int, int);
-static void	vq_ring_notify_host(struct virtqueue *, int);
+static int	vq_ring_must_notify_host(struct virtqueue *);
+static void	vq_ring_notify_host(struct virtqueue *);
 static void	vq_ring_free_chain(struct virtqueue *, uint16_t);
 
 uint64_t
@@ -136,6 +138,7 @@ virtqueue_filter_features(uint64_t featu
 
 	mask = (1 << VIRTIO_TRANSPORT_F_START) - 1;
 	mask |= VIRTIO_RING_F_INDIRECT_DESC;
+	mask |= VIRTIO_RING_F_EVENT_IDX;
 
 	return (features & mask);
 }
@@ -184,6 +187,9 @@ virtqueue_alloc(device_t dev, uint16_t q
 	vq->vq_intrhand = info->vqai_intr;
 	vq->vq_intrhand_arg = info->vqai_intr_arg;
 
+	if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0)
+		vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX;
+
 	if (info->vqai_maxindirsz > 1) {
 		error = virtqueue_init_indirect(vq, info->vqai_maxindirsz);
 		if (error)
@@ -384,9 +390,12 @@ virtqueue_full(struct virtqueue *vq)
 void
 virtqueue_notify(struct virtqueue *vq)
 {
+	/* Ensure updated avail->idx is visible to host. */
+	mb();
 
+	if (vq_ring_must_notify_host(vq))
+		vq_ring_notify_host(vq);
 	vq->vq_queued_cnt = 0;
-	vq_ring_notify_host(vq, 0);
 }
 
 int
@@ -395,11 +404,8 @@ virtqueue_nused(struct virtqueue *vq)
 	uint16_t used_idx, nused;
 
 	used_idx = vq->vq_ring.used->idx;
-	if (used_idx >= vq->vq_used_cons_idx)
-		nused = used_idx - vq->vq_used_cons_idx;
-	else
-		nused = UINT16_MAX - vq->vq_used_cons_idx +
-		    used_idx + 1;
+
+	nused = (uint16_t)(used_idx - vq->vq_used_cons_idx);
 	VQASSERT(vq, nused <= vq->vq_nentries, "used more than available");
 
 	return (nused);
@@ -427,6 +433,10 @@ virtqueue_enable_intr(struct virtqueue *
 	 * index of what's already been consumed.
 	 */
 	vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
+		vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx;
+       else
+	       vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
 
 	mb();
 
@@ -441,6 +451,37 @@ virtqueue_enable_intr(struct virtqueue *
 	return (0);
 }
 
+int
+virtqueue_postpone_intr(struct virtqueue *vq)
+{
+	uint16_t ndesc;
+
+	/*
+	 * Postpone until at least half of the available descriptors
+	 * have been consumed.
+	 *
+	 * XXX Adaptive factor? (Linux uses 3/4)
+	 */
+	ndesc = (uint16_t)(vq->vq_ring.avail->idx - vq->vq_used_cons_idx) / 2;
+
+	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
+		vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc;
+	else
+		vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
+
+	mb();
+
+	/*
+	 * Enough items may have already been consumed to meet our
+	 * threshold since we last checked. Let our caller know so
+	 * it processes the new entries.
+	 */
+	if (virtqueue_nused(vq) > ndesc)
+		return (1);
+
+	return (0);
+}
+
 void
 virtqueue_disable_intr(struct virtqueue *vq)
 {
@@ -448,7 +489,8 @@ virtqueue_disable_intr(struct virtqueue 
 	/*
 	 * Note this is only considered a hint to the host.
 	 */
-	vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
+	if ((vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) == 0)
+		vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
 }
 
 int
@@ -618,7 +660,7 @@ vq_ring_update_avail(struct virtqueue *v
 	mb();
 	vq->vq_ring.avail->idx++;
 
-	/* Keep pending count until virtqueue_notify() for debugging. */
+	/* Keep pending count until virtqueue_notify(). */
 	vq->vq_queued_cnt++;
 }
 
@@ -709,15 +751,27 @@ vq_ring_enqueue_indirect(struct virtqueu
 	vq_ring_update_avail(vq, head_idx);
 }
 
-static void
-vq_ring_notify_host(struct virtqueue *vq, int force)
+static int
+vq_ring_must_notify_host(struct virtqueue *vq)
 {
+	uint16_t new_idx, prev_idx, event_idx;
 
-	mb();
+	if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
+		new_idx = vq->vq_ring.avail->idx;
+		prev_idx = new_idx - vq->vq_queued_cnt;
+		event_idx = vring_avail_event(&vq->vq_ring);
+
+		return (vring_need_event(event_idx, new_idx, prev_idx) != 0);
+	}
+
+	return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0);
+}
+
+static void
+vq_ring_notify_host(struct virtqueue *vq)
+{
 
-	if (force ||
-	    (vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0)
-		VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index);
+	VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index);
 }
 
 static void

Modified: head/sys/dev/virtio/virtqueue.h
==============================================================================
--- head/sys/dev/virtio/virtqueue.h	Sat Apr 14 04:40:11 2012	(r234269)
+++ head/sys/dev/virtio/virtqueue.h	Sat Apr 14 05:48:04 2012	(r234270)
@@ -78,6 +78,7 @@ int	 virtqueue_reinit(struct virtqueue *
 
 int	 virtqueue_intr(struct virtqueue *vq);
 int	 virtqueue_enable_intr(struct virtqueue *vq);
+int	 virtqueue_postpone_intr(struct virtqueue *vq);
 void	 virtqueue_disable_intr(struct virtqueue *vq);
 
 /* Get physical address of the virtqueue ring. */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204140548.q3E5m5Qn015526>