From owner-svn-src-head@FreeBSD.ORG  Sun May 16 11:56:43 2010
Return-Path: <owner-svn-src-head@FreeBSD.ORG>
Delivered-To: svn-src-head@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 2F06E1065670;
	Sun, 16 May 2010 11:56:43 +0000 (UTC) (envelope-from pjd@FreeBSD.org)
Received: from svn.freebsd.org (unknown [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 1D42A8FC15;
	Sun, 16 May 2010 11:56:43 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o4GBuhWr093124;
	Sun, 16 May 2010 11:56:43 GMT (envelope-from pjd@svn.freebsd.org)
Received: (from pjd@localhost)
	by svn.freebsd.org (8.14.3/8.14.3/Submit) id o4GBug1Y093122;
	Sun, 16 May 2010 11:56:42 GMT (envelope-from pjd@svn.freebsd.org)
Message-Id: <201005161156.o4GBug1Y093122@svn.freebsd.org>
From: Pawel Jakub Dawidek <pjd@FreeBSD.org>
Date: Sun, 16 May 2010 11:56:42 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-head@freebsd.org
X-SVN-Group: head
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r208142 -
	head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
X-BeenThere: svn-src-head@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for the src tree for head/-current
	<svn-src-head.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-head>,
	<mailto:svn-src-head-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-head>
List-Post: <mailto:svn-src-head@freebsd.org>
List-Help: <mailto:svn-src-head-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-head>,
	<mailto:svn-src-head-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Sun, 16 May 2010 11:56:43 -0000

Author: pjd
Date: Sun May 16 11:56:42 2010
New Revision: 208142
URL: http://svn.freebsd.org/changeset/base/208142

Log:
  The whole point of having dedicated worker thread for each leaf VDEV was to
  avoid calling zio_interrupt() from geom_up thread context. It turns out that
  when provider is forcibly removed from the system and we kill worker thread
  there can still be some ZIOs pending. To complete pending ZIOs when there is
  no worker thread anymore we still have to call zio_interrupt() from geom_up
  context. To avoid this race just remove use of worker threads altogether.
  This should be more or less fine, because I also thought that zio_interrupt()
  does more work, but it only makes small UMA allocation with M_WAITOK.
  It also saves one context switch per I/O request.
  
  PR:		kern/145339
  Reported by:	Alex Bakhtin <Alex.Bakhtin@gmail.com>
  MFC after:	1 week

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Sun May 16 11:17:21 2010	(r208141)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c	Sun May 16 11:56:42 2010	(r208142)
@@ -47,31 +47,6 @@ struct g_class zfs_vdev_class = {
 
 DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
 
-typedef struct vdev_geom_ctx {
-	struct g_consumer *gc_consumer;
-	int gc_state;
-	struct bio_queue_head gc_queue;
-	struct mtx gc_queue_mtx;
-} vdev_geom_ctx_t;
-
-static void
-vdev_geom_release(vdev_t *vd)
-{
-	vdev_geom_ctx_t *ctx;
-
-	ctx = vd->vdev_tsd;
-	vd->vdev_tsd = NULL;
-
-	mtx_lock(&ctx->gc_queue_mtx);
-	ctx->gc_state = 1;
-	wakeup_one(&ctx->gc_queue);
-	while (ctx->gc_state != 2)
-		msleep(&ctx->gc_state, &ctx->gc_queue_mtx, 0, "vgeom:w", 0);
-	mtx_unlock(&ctx->gc_queue_mtx);
-	mtx_destroy(&ctx->gc_queue_mtx);
-	kmem_free(ctx, sizeof(*ctx));
-}
-
 static void
 vdev_geom_orphan(struct g_consumer *cp)
 {
@@ -96,8 +71,7 @@ vdev_geom_orphan(struct g_consumer *cp)
 		ZFS_LOG(1, "Destroyed geom %s.", gp->name);
 		g_wither_geom(gp, error);
 	}
-	vdev_geom_release(vd);
-
+	vd->vdev_tsd = NULL;
 	vd->vdev_remove_wanted = B_TRUE;
 	spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
 }
@@ -188,52 +162,6 @@ vdev_geom_detach(void *arg, int flag __u
 	}
 }
 
-static void
-vdev_geom_worker(void *arg)
-{
-	vdev_geom_ctx_t *ctx;
-	zio_t *zio;
-	struct bio *bp;
-
-	thread_lock(curthread);
-	sched_prio(curthread, PRIBIO);
-	thread_unlock(curthread);
-
-	ctx = arg;
-	for (;;) {
-		mtx_lock(&ctx->gc_queue_mtx);
-		bp = bioq_takefirst(&ctx->gc_queue);
-		if (bp == NULL) {
-			if (ctx->gc_state == 1) {
-				ctx->gc_state = 2;
-				wakeup_one(&ctx->gc_state);
-				mtx_unlock(&ctx->gc_queue_mtx);
-				kthread_exit();
-			}
-			msleep(&ctx->gc_queue, &ctx->gc_queue_mtx,
-			    PRIBIO | PDROP, "vgeom:io", 0);
-			continue;
-		}
-		mtx_unlock(&ctx->gc_queue_mtx);
-		zio = bp->bio_caller1;
-		zio->io_error = bp->bio_error;
-		if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
-			vdev_t *vd;
-
-			/*
-			 * If we get ENOTSUP, we know that no future
-			 * attempts will ever succeed.  In this case we
-			 * set a persistent bit so that we don't bother
-			 * with the ioctl in the future.
-			 */
-			vd = zio->io_vd;
-			vd->vdev_nowritecache = B_TRUE;
-		}
-		g_destroy_bio(bp);
-		zio_interrupt(zio);
-	}
-}
-
 static uint64_t
 nvlist_get_guid(nvlist_t *list)
 {
@@ -488,7 +416,6 @@ vdev_geom_open_by_path(vdev_t *vd, int c
 static int
 vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
 {
-	vdev_geom_ctx_t *ctx;
 	struct g_provider *pp;
 	struct g_consumer *cp;
 	int error, owned;
@@ -557,19 +484,9 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
 	}
 
 	cp->private = vd;
-
-	ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP);
-	bioq_init(&ctx->gc_queue);
-	mtx_init(&ctx->gc_queue_mtx, "zfs:vdev:geom:queue", NULL, MTX_DEF);
-	ctx->gc_consumer = cp;
-	ctx->gc_state = 0;
-
-	vd->vdev_tsd = ctx;
+	vd->vdev_tsd = cp;
 	pp = cp->provider;
 
-	kproc_kthread_add(vdev_geom_worker, ctx, &zfsproc, NULL, 0, 0,
-	    "zfskern", "vdev %s", pp->name);
-
 	/*
 	 * Determine the actual size of the device.
 	 */
@@ -592,50 +509,49 @@ vdev_geom_open(vdev_t *vd, uint64_t *psi
 static void
 vdev_geom_close(vdev_t *vd)
 {
-	vdev_geom_ctx_t *ctx;
 	struct g_consumer *cp;
 
-	if ((ctx = vd->vdev_tsd) == NULL)
-		return;
-	if ((cp = ctx->gc_consumer) == NULL)
+	cp = vd->vdev_tsd;
+	if (cp == NULL)
 		return;
-	vdev_geom_release(vd);
+	vd->vdev_tsd = NULL;
 	g_post_event(vdev_geom_detach, cp, M_WAITOK, NULL);
 }
 
 static void
 vdev_geom_io_intr(struct bio *bp)
 {
-	vdev_geom_ctx_t *ctx;
 	zio_t *zio;
 
 	zio = bp->bio_caller1;
-	ctx = zio->io_vd->vdev_tsd;
-
-	if ((zio->io_error = bp->bio_error) == 0 && bp->bio_resid != 0)
+	zio->io_error = bp->bio_error;
+	if (zio->io_error == 0 && bp->bio_resid != 0)
 		zio->io_error = EIO;
+	if (bp->bio_cmd == BIO_FLUSH && bp->bio_error == ENOTSUP) {
+		vdev_t *vd;
 
-	mtx_lock(&ctx->gc_queue_mtx);
-	bioq_insert_tail(&ctx->gc_queue, bp);
-	wakeup_one(&ctx->gc_queue);
-	mtx_unlock(&ctx->gc_queue_mtx);
+		/*
+		 * If we get ENOTSUP, we know that no future
+		 * attempts will ever succeed.  In this case we
+		 * set a persistent bit so that we don't bother
+		 * with the ioctl in the future.
+		 */
+		vd = zio->io_vd;
+		vd->vdev_nowritecache = B_TRUE;
+	}
+	g_destroy_bio(bp);
+	zio_interrupt(zio);
 }
 
 static int
 vdev_geom_io_start(zio_t *zio)
 {
 	vdev_t *vd;
-	vdev_geom_ctx_t *ctx;
 	struct g_consumer *cp;
 	struct bio *bp;
 	int error;
 
-	cp = NULL;
-
 	vd = zio->io_vd;
-	ctx = vd->vdev_tsd;
-	if (ctx != NULL)
-		cp = ctx->gc_consumer;
 
 	if (zio->io_type == ZIO_TYPE_IOCTL) {
 		/* XXPOLICY */
@@ -664,6 +580,7 @@ vdev_geom_io_start(zio_t *zio)
 		return (ZIO_PIPELINE_CONTINUE);
 	}
 sendreq:
+	cp = vd->vdev_tsd;
 	if (cp == NULL) {
 		zio->io_error = ENXIO;
 		return (ZIO_PIPELINE_CONTINUE);