Date: Sat, 27 Jun 2015 09:44:14 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r284887 - in head/sys: kern sys ufs/ffs Message-ID: <201506270944.t5R9iEjO053344@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Sat Jun 27 09:44:14 2015 New Revision: 284887 URL: https://svnweb.freebsd.org/changeset/base/284887 Log: Handle errors from background write of the cylinder group blocks. First, on the write error, bufdone() call from ffs_backgroundwrite() panics because pbrelvp() cleared bp->b_bufobj, while brelse() would try to re-dirty the copy of the cg buffer. Handle this by setting B_INVAL for the case of BIO_ERROR. Second, we must re-dirty the real buffer containing the cylinder group block data when background write failed. Real cg buffer was already marked clean in ffs_bufwrite(). After the BV_BKGRDINPROG flag is cleared on the real cg buffer in ffs_backgroundwrite(), buffer scan may reuse the buffer at any moment. The result is lost write, and if the write error was only transient, we get corrupted bitmaps. We cannot re-dirty the original cg buffer in the ffs_backgroundwritedone(), since the context is not sleepable, preventing us from sleeping for origbp' lock. Add BV_BKGDERR flag (protected by the buffer object lock), which is converted into delayed write by brelse(), bqrelse() and buffer scan. In collaboration with: Conrad Meyer <cse.cem@gmail.com> Reviewed by: mckusick Sponsored by: The FreeBSD Foundation (kib), EMC/Isilon storage division (Conrad) MFC after: 2 weeks Modified: head/sys/kern/vfs_bio.c head/sys/sys/buf.h head/sys/ufs/ffs/ffs_vfsops.c Modified: head/sys/kern/vfs_bio.c ============================================================================== --- head/sys/kern/vfs_bio.c Sat Jun 27 09:01:49 2015 (r284886) +++ head/sys/kern/vfs_bio.c Sat Jun 27 09:44:14 2015 (r284887) @@ -1597,6 +1597,12 @@ brelse(struct buf *bp) return; } + if ((bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) { + BO_LOCK(bp->b_bufobj); + bp->b_vflags &= ~BV_BKGRDERR; + BO_UNLOCK(bp->b_bufobj); + bdirty(bp); + } if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) && bp->b_error == EIO && !(bp->b_flags & B_INVAL)) { /* @@ -1853,7 +1859,11 @@ bqrelse(struct buf *bp) } /* buffers with stale but valid contents */ - if (bp->b_flags & B_DELWRI) { + if ((bp->b_flags & B_DELWRI) != 0 || (bp->b_vflags & (BV_BKGRDINPROG | + BV_BKGRDERR)) == BV_BKGRDERR) { + BO_LOCK(bp->b_bufobj); + bp->b_vflags &= ~BV_BKGRDERR; + BO_UNLOCK(bp->b_bufobj); qindex = QUEUE_DIRTY; } else { if ((bp->b_flags & B_DELWRI) == 0 && @@ -2372,6 +2382,16 @@ restart: continue; } + /* + * Requeue the background write buffer with error. + */ + if ((bp->b_vflags & BV_BKGRDERR) != 0) { + bremfreel(bp); + mtx_unlock(&bqclean); + bqrelse(bp); + continue; + } + KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistent queue %d bp %p", qindex, bp)); Modified: head/sys/sys/buf.h ============================================================================== --- head/sys/sys/buf.h Sat Jun 27 09:01:49 2015 (r284886) +++ head/sys/sys/buf.h Sat Jun 27 09:44:14 2015 (r284887) @@ -253,8 +253,9 @@ struct buf { #define BV_SCANNED 0x00000001 /* VOP_FSYNC funcs mark written bufs */ #define BV_BKGRDINPROG 0x00000002 /* Background write in progress */ #define BV_BKGRDWAIT 0x00000004 /* Background write waiting */ +#define BV_BKGRDERR 0x00000008 /* Error from background write */ -#define PRINT_BUF_VFLAGS "\20\3bkgrdwait\2bkgrdinprog\1scanned" +#define PRINT_BUF_VFLAGS "\20\4bkgrderr\3bkgrdwait\2bkgrdinprog\1scanned" #ifdef _KERNEL /* Modified: head/sys/ufs/ffs/ffs_vfsops.c ============================================================================== --- head/sys/ufs/ffs/ffs_vfsops.c Sat Jun 27 09:01:49 2015 (r284886) +++ head/sys/ufs/ffs/ffs_vfsops.c Sat Jun 27 09:44:14 2015 (r284887) @@ -1978,12 +1978,19 @@ ffs_backgroundwritedone(struct buf *bp) BO_LOCK(bufobj); if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL) panic("backgroundwritedone: lost buffer"); + + /* + * We should mark the cylinder group buffer origbp as + * dirty, to not loose the failed write. + */ + if ((bp->b_ioflags & BIO_ERROR) != 0) + origbp->b_vflags |= BV_BKGRDERR; BO_UNLOCK(bufobj); /* * Process dependencies then return any unfinished ones. */ pbrelvp(bp); - if (!LIST_EMPTY(&bp->b_dep)) + if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0) buf_complete(bp); #ifdef SOFTUPDATES if (!LIST_EMPTY(&bp->b_dep)) @@ -1995,6 +2002,15 @@ ffs_backgroundwritedone(struct buf *bp) */ bp->b_flags |= B_NOCACHE; bp->b_flags &= ~B_CACHE; + + /* + * Prevent brelse() from trying to keep and re-dirtying bp on + * errors. It causes b_bufobj dereference in + * bdirty()/reassignbuf(), and b_bufobj was cleared in + * pbrelvp() above. + */ + if ((bp->b_ioflags & BIO_ERROR) != 0) + bp->b_flags |= B_INVAL; bufdone(bp); BO_LOCK(bufobj); /* @@ -2056,6 +2072,8 @@ ffs_bufwrite(struct buf *bp) if (bp->b_vflags & BV_BKGRDINPROG) panic("bufwrite: still writing"); } + if ((bp->b_vflags & BV_BKGRDERR) != 0) + bp->b_vflags &= ~BV_BKGRDERR; BO_UNLOCK(bp->b_bufobj); /*
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201506270944.t5R9iEjO053344>