Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 4 Nov 2001 16:06:41 -0800 (PST)
From:      Matthew Dillon <dillon@apollo.backplane.com>
To:        Mark Santcroos <marks@ripe.net>, current@FreeBSD.ORG
Subject:   Re: buf_daemon() lockup
Message-ID:  <200111050006.fA506f309535@apollo.backplane.com>
References:  <20011101092118.A434@laptop.6bone.nl> <200111042259.fA4MxSc93566@apollo.backplane.com>

next in thread | previous in thread | raw e-mail | index | archive | help
    Ok, I think I've whacked this one.  Try this patch and see if it
    fixes your buf_daemon() lockups.  The patch also fixes the 
    double-data-caching that occurs with file-backed MD.

    If this works for you, Mark, I'll commit it and probably also MFC it.
    I'll also be able to apply the same IO_NOWDRAIN fixes to the nfs server
    code for loopback mounts.

						-Matt

Index: dev/md/md.c
===================================================================
RCS file: /home/ncvs/src/sys/dev/md/md.c,v
retrieving revision 1.47
diff -u -r1.47 md.c
--- dev/md/md.c	2001/10/11 23:38:13	1.47
+++ dev/md/md.c	2001/11/04 23:54:18
@@ -388,13 +388,18 @@
 		auio.uio_td = curthread;
 		if (VOP_ISLOCKED(sc->vnode, NULL))
 			vprint("unexpected md driver lock", sc->vnode);
+		/*
+		 * When reading set IO_DIRECT to try to avoid double-caching
+		 * the data.  When writing IO_DIRECT is not optimal, but we
+		 * must set IO_NOWDRAIN to avoid a wdrain deadlock.
+		 */
 		if (bp->bio_cmd == BIO_READ) {
 			vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
-			error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
+			error = VOP_READ(sc->vnode, &auio, IO_DIRECT, sc->cred);
 		} else {
 			(void) vn_start_write(sc->vnode, &mp, V_WAIT);
 			vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
-			error = VOP_WRITE(sc->vnode, &auio, 0, sc->cred);
+			error = VOP_WRITE(sc->vnode, &auio, IO_NOWDRAIN, sc->cred);
 			vn_finished_write(mp);
 		}
 		VOP_UNLOCK(sc->vnode, 0, curthread);
Index: kern/vfs_bio.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.291
diff -u -r1.291 vfs_bio.c
--- kern/vfs_bio.c	2001/10/21 06:26:55	1.291
+++ kern/vfs_bio.c	2001/11/04 23:41:19
@@ -758,11 +758,15 @@
 		int rtval = bufwait(bp);
 		brelse(bp);
 		return (rtval);
-	} else {
+	} else if ((oldflags & B_NOWDRAIN) == 0) {
 		/*
 		 * don't allow the async write to saturate the I/O
-		 * system.  There is no chance of deadlock here because
-		 * we are blocking on I/O that is already in-progress.
+		 * system.  Deadlocks can occur only if a device strategy
+		 * routine (like in MD) turns around and issues another
+		 * high-level write, in which case B_NOWDRAIN is expected
+		 * to be set.  Otherwise we will not deadlock here because
+		 * we are blocking waiting for I/O that is already in-progress
+		 * to complete.
 		 */
 		waitrunningbufspace();
 	}
@@ -1286,7 +1290,8 @@
 
 	/* unlock */
 	BUF_UNLOCK(bp);
-	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | B_DIRECT);
+	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | 
+			B_DIRECT | B_NOWDRAIN);
 	bp->b_ioflags &= ~BIO_ORDERED;
 	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
 		panic("brelse: not dirty");
Index: sys/buf.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/buf.h,v
retrieving revision 1.121
diff -u -r1.121 buf.h
--- sys/buf.h	2001/09/12 08:38:04	1.121
+++ sys/buf.h	2001/11/04 23:30:25
@@ -192,6 +192,11 @@
  *			the pages underlying the buffer.  B_DIRECT is
  *			sticky until the buffer is released and typically
  *			only has an effect when B_RELBUF is also set.
+ *
+ *	B_NOWDRAIN	This flag should be set when a device (like MD)
+ *			does a turn-around VOP_WRITE from its strategy
+ *			routine.  This flag prevents bwrite() from blocking
+ *			in wdrain, avoiding a deadlock situation.
  */
 
 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
@@ -204,7 +209,7 @@
 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
 #define	B_DONE		0x00000200	/* I/O completed. */
 #define	B_EINTR		0x00000400	/* I/O was interrupted */
-#define	B_00000800	0x00000800	/* Available flag. */
+#define	B_NOWDRAIN	0x00000800	/* Avoid wdrain deadlock */
 #define	B_SCANNED	0x00001000	/* VOP_FSYNC funcs mark written bufs */
 #define	B_INVAL		0x00002000	/* Does not contain valid info. */
 #define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
Index: sys/vnode.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/vnode.h,v
retrieving revision 1.162
diff -u -r1.162 vnode.h
--- sys/vnode.h	2001/10/27 19:58:55	1.162
+++ sys/vnode.h	2001/11/04 23:27:40
@@ -222,6 +222,7 @@
 #define	IO_INVAL	0x40		/* invalidate after I/O */
 #define	IO_ASYNC	0x80		/* bawrite rather then bdwrite */
 #define IO_DIRECT	0x100		/* attempt to bypass buffer cache */
+#define IO_NOWDRAIN	0x200		/* do not block on wdrain */
 
 /*
  *  Modes.  Some values same as Ixxx entries from inode.h for now.
Index: ufs/ufs/ufs_readwrite.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ufs/ufs_readwrite.c,v
retrieving revision 1.82
diff -u -r1.82 ufs_readwrite.c
--- ufs/ufs/ufs_readwrite.c	2001/09/12 08:38:10	1.82
+++ ufs/ufs/ufs_readwrite.c	2001/11/04 23:29:15
@@ -511,6 +511,8 @@
 			break;
 		if (ioflag & IO_DIRECT)
 			bp->b_flags |= B_DIRECT;
+		if (ioflag & IO_NOWDRAIN)
+			bp->b_flags |= B_NOWDRAIN;
 
 		if (uio->uio_offset + xfersize > ip->i_size) {
 			ip->i_size = uio->uio_offset + xfersize;

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200111050006.fA506f309535>