Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 27 Jun 2015 09:44:14 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r284887 - in head/sys: kern sys ufs/ffs
Message-ID:  <201506270944.t5R9iEjO053344@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Sat Jun 27 09:44:14 2015
New Revision: 284887
URL: https://svnweb.freebsd.org/changeset/base/284887

Log:
  Handle errors from background write of the cylinder group blocks.
  
  First, on the write error, bufdone() call from ffs_backgroundwrite()
  panics because pbrelvp() cleared bp->b_bufobj, while brelse() would
  try to re-dirty the copy of the cg buffer.  Handle this by setting
  B_INVAL for the case of BIO_ERROR.
  
  Second, we must re-dirty the real buffer containing the cylinder group
  block data when background write failed.  Real cg buffer was already
  marked clean in ffs_bufwrite(). After the BV_BKGRDINPROG flag is
  cleared on the real cg buffer in ffs_backgroundwrite(), buffer scan
  may reuse the buffer at any moment. The result is lost write, and if
  the write error was only transient, we get corrupted bitmaps.
  
  We cannot re-dirty the original cg buffer in the
  ffs_backgroundwritedone(), since the context is not sleepable,
  preventing us from sleeping for origbp' lock.  Add BV_BKGDERR flag
  (protected by the buffer object lock), which is converted into delayed
  write by brelse(), bqrelse() and buffer scan.
  
  In collaboration with:	Conrad Meyer <cse.cem@gmail.com>
  Reviewed by:	mckusick
  Sponsored by:	The FreeBSD Foundation (kib),
  	  EMC/Isilon storage division (Conrad)
  MFC after:	2 weeks

Modified:
  head/sys/kern/vfs_bio.c
  head/sys/sys/buf.h
  head/sys/ufs/ffs/ffs_vfsops.c

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c	Sat Jun 27 09:01:49 2015	(r284886)
+++ head/sys/kern/vfs_bio.c	Sat Jun 27 09:44:14 2015	(r284887)
@@ -1597,6 +1597,12 @@ brelse(struct buf *bp)
 		return;
 	}
 
+	if ((bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) {
+		BO_LOCK(bp->b_bufobj);
+		bp->b_vflags &= ~BV_BKGRDERR;
+		BO_UNLOCK(bp->b_bufobj);
+		bdirty(bp);
+	}
 	if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
 	    bp->b_error == EIO && !(bp->b_flags & B_INVAL)) {
 		/*
@@ -1853,7 +1859,11 @@ bqrelse(struct buf *bp)
 	}
 
 	/* buffers with stale but valid contents */
-	if (bp->b_flags & B_DELWRI) {
+	if ((bp->b_flags & B_DELWRI) != 0 || (bp->b_vflags & (BV_BKGRDINPROG |
+	    BV_BKGRDERR)) == BV_BKGRDERR) {
+		BO_LOCK(bp->b_bufobj);
+		bp->b_vflags &= ~BV_BKGRDERR;
+		BO_UNLOCK(bp->b_bufobj);
 		qindex = QUEUE_DIRTY;
 	} else {
 		if ((bp->b_flags & B_DELWRI) == 0 &&
@@ -2372,6 +2382,16 @@ restart:
 			continue;
 		}
 
+		/*
+		 * Requeue the background write buffer with error.
+		 */
+		if ((bp->b_vflags & BV_BKGRDERR) != 0) {
+			bremfreel(bp);
+			mtx_unlock(&bqclean);
+			bqrelse(bp);
+			continue;
+		}
+
 		KASSERT(bp->b_qindex == qindex,
 		    ("getnewbuf: inconsistent queue %d bp %p", qindex, bp));
 

Modified: head/sys/sys/buf.h
==============================================================================
--- head/sys/sys/buf.h	Sat Jun 27 09:01:49 2015	(r284886)
+++ head/sys/sys/buf.h	Sat Jun 27 09:44:14 2015	(r284887)
@@ -253,8 +253,9 @@ struct buf {
 #define	BV_SCANNED	0x00000001	/* VOP_FSYNC funcs mark written bufs */
 #define	BV_BKGRDINPROG	0x00000002	/* Background write in progress */
 #define	BV_BKGRDWAIT	0x00000004	/* Background write waiting */
+#define	BV_BKGRDERR	0x00000008	/* Error from background write */
 
-#define	PRINT_BUF_VFLAGS "\20\3bkgrdwait\2bkgrdinprog\1scanned"
+#define	PRINT_BUF_VFLAGS "\20\4bkgrderr\3bkgrdwait\2bkgrdinprog\1scanned"
 
 #ifdef _KERNEL
 /*

Modified: head/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- head/sys/ufs/ffs/ffs_vfsops.c	Sat Jun 27 09:01:49 2015	(r284886)
+++ head/sys/ufs/ffs/ffs_vfsops.c	Sat Jun 27 09:44:14 2015	(r284887)
@@ -1978,12 +1978,19 @@ ffs_backgroundwritedone(struct buf *bp)
 	BO_LOCK(bufobj);
 	if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
 		panic("backgroundwritedone: lost buffer");
+
+	/*
+	 * We should mark the cylinder group buffer origbp as
+	 * dirty, to not loose the failed write.
+	 */
+	if ((bp->b_ioflags & BIO_ERROR) != 0)
+		origbp->b_vflags |= BV_BKGRDERR;
 	BO_UNLOCK(bufobj);
 	/*
 	 * Process dependencies then return any unfinished ones.
 	 */
 	pbrelvp(bp);
-	if (!LIST_EMPTY(&bp->b_dep))
+	if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
 		buf_complete(bp);
 #ifdef SOFTUPDATES
 	if (!LIST_EMPTY(&bp->b_dep))
@@ -1995,6 +2002,15 @@ ffs_backgroundwritedone(struct buf *bp)
 	 */
 	bp->b_flags |= B_NOCACHE;
 	bp->b_flags &= ~B_CACHE;
+
+	/*
+	 * Prevent brelse() from trying to keep and re-dirtying bp on
+	 * errors. It causes b_bufobj dereference in
+	 * bdirty()/reassignbuf(), and b_bufobj was cleared in
+	 * pbrelvp() above.
+	 */
+	if ((bp->b_ioflags & BIO_ERROR) != 0)
+		bp->b_flags |= B_INVAL;
 	bufdone(bp);
 	BO_LOCK(bufobj);
 	/*
@@ -2056,6 +2072,8 @@ ffs_bufwrite(struct buf *bp)
 		if (bp->b_vflags & BV_BKGRDINPROG)
 			panic("bufwrite: still writing");
 	}
+	if ((bp->b_vflags & BV_BKGRDERR) != 0)
+		bp->b_vflags &= ~BV_BKGRDERR;
 	BO_UNLOCK(bp->b_bufobj);
 
 	/*



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201506270944.t5R9iEjO053344>