From owner-svn-src-all@FreeBSD.ORG Mon May 4 08:16:33 2015 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 61DFB555; Mon, 4 May 2015 08:16:33 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 42DD81D57; Mon, 4 May 2015 08:16:33 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id t448GXn2055390; Mon, 4 May 2015 08:16:33 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id t448GXr0055389; Mon, 4 May 2015 08:16:33 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201505040816.t448GXr0055389@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Mon, 4 May 2015 08:16:33 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r282411 - stable/10/sys/kern X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 04 May 2015 08:16:33 -0000 Author: kib Date: Mon May 4 08:16:32 2015 New Revision: 282411 URL: https://svnweb.freebsd.org/changeset/base/282411 Log: MFC r282085: Partially revert r255986: do not call VOP_FSYNC() when helping bufdaemon in getnewbuf(), do use buf_flush(). The difference is that bufdaemon uses TRYLOCK to get buffer locks, which allows calls to getnewbuf() while another buffer is locked. Modified: stable/10/sys/kern/vfs_bio.c Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/kern/vfs_bio.c ============================================================================== --- stable/10/sys/kern/vfs_bio.c Mon May 4 08:13:05 2015 (r282410) +++ stable/10/sys/kern/vfs_bio.c Mon May 4 08:16:32 2015 (r282411) @@ -113,8 +113,8 @@ static void vfs_setdirty_locked_object(s static void vfs_vmio_release(struct buf *bp); static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno); -static int buf_flush(int); -static int flushbufqueues(int, int); +static int buf_flush(struct vnode *vp, int); +static int flushbufqueues(struct vnode *, int, int); static void buf_daemon(void); static void bremfreel(struct buf *bp); static __inline void bd_wakeup(void); @@ -2065,7 +2065,7 @@ getnewbuf_bufd_help(struct vnode *vp, in { struct thread *td; char *waitmsg; - int cnt, error, flags, norunbuf, wait; + int error, fl, flags, norunbuf; mtx_assert(&bqclean, MA_OWNED); @@ -2087,8 +2087,6 @@ getnewbuf_bufd_help(struct vnode *vp, in return; td = curthread; - cnt = 0; - wait = MNT_NOWAIT; rw_wlock(&nblock); while ((needsbuffer & flags) != 0) { if (vp != NULL && vp->v_type != VCHR && @@ -2102,20 +2100,23 @@ getnewbuf_bufd_help(struct vnode *vp, in * cannot be achieved by the buf_daemon, that * cannot lock the vnode. */ - if (cnt++ > 2) - wait = MNT_WAIT; - ASSERT_VOP_LOCKED(vp, "bufd_helper"); - error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 : - vn_lock(vp, LK_TRYUPGRADE); - if (error == 0) { - /* play bufdaemon */ - norunbuf = curthread_pflags_set(TDP_BUFNEED | - TDP_NORUNNINGBUF); - VOP_FSYNC(vp, wait, td); - atomic_add_long(¬bufdflushes, 1); - curthread_pflags_restore(norunbuf); - } + norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) | + (td->td_pflags & TDP_NORUNNINGBUF); + + /* + * Play bufdaemon. The getnewbuf() function + * may be called while the thread owns lock + * for another dirty buffer for the same + * vnode, which makes it impossible to use + * VOP_FSYNC() there, due to the buffer lock + * recursion. + */ + td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; + fl = buf_flush(vp, flushbufqtarget); + td->td_pflags &= norunbuf; rw_wlock(&nblock); + if (fl != 0) + continue; if ((needsbuffer & flags) == 0) break; } @@ -2534,18 +2535,20 @@ static struct kproc_desc buf_kp = { SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp); static int -buf_flush(int target) +buf_flush(struct vnode *vp, int target) { int flushed; - flushed = flushbufqueues(target, 0); + flushed = flushbufqueues(vp, target, 0); if (flushed == 0) { /* * Could not find any buffers without rollback * dependencies, so just write the first one * in the hopes of eventually making progress. */ - flushed = flushbufqueues(target, 1); + if (vp != NULL && target > 2) + target /= 2; + flushbufqueues(vp, target, 1); } return (flushed); } @@ -2582,7 +2585,7 @@ buf_daemon() * the I/O system. */ while (numdirtybuffers > lodirty) { - if (buf_flush(numdirtybuffers - lodirty) == 0) + if (buf_flush(NULL, numdirtybuffers - lodirty) == 0) break; kern_yield(PRI_USER); } @@ -2637,7 +2640,7 @@ SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps 0, "Number of buffers flushed with dependecies that require rollbacks"); static int -flushbufqueues(int target, int flushdeps) +flushbufqueues(struct vnode *lvp, int target, int flushdeps) { struct buf *sentinel; struct vnode *vp; @@ -2647,6 +2650,7 @@ flushbufqueues(int target, int flushdeps int flushed; int queue; int error; + bool unlock; flushed = 0; queue = QUEUE_DIRTY; @@ -2668,8 +2672,18 @@ flushbufqueues(int target, int flushdeps mtx_unlock(&bqdirty); break; } - KASSERT(bp->b_qindex != QUEUE_SENTINEL, - ("parallel calls to flushbufqueues() bp %p", bp)); + /* + * Skip sentinels inserted by other invocations of the + * flushbufqueues(), taking care to not reorder them. + * + * Only flush the buffers that belong to the + * vnode locked by the curthread. + */ + if (bp->b_qindex == QUEUE_SENTINEL || (lvp != NULL && + bp->b_vp != lvp)) { + mtx_unlock(&bqdirty); + continue; + } error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL); mtx_unlock(&bqdirty); if (error != 0) @@ -2717,16 +2731,37 @@ flushbufqueues(int target, int flushdeps BUF_UNLOCK(bp); continue; } - error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT); + if (lvp == NULL) { + unlock = true; + error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT); + } else { + ASSERT_VOP_LOCKED(vp, "getbuf"); + unlock = false; + error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 : + vn_lock(vp, LK_TRYUPGRADE); + } if (error == 0) { CTR3(KTR_BUF, "flushbufqueue(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); - vfs_bio_awrite(bp); + if (curproc == bufdaemonproc) { + vfs_bio_awrite(bp); + } else { + bremfree(bp); + bwrite(bp); + notbufdflushes++; + } vn_finished_write(mp); - VOP_UNLOCK(vp, 0); + if (unlock) + VOP_UNLOCK(vp, 0); flushwithdeps += hasdeps; flushed++; - if (runningbufspace > hirunningspace) + + /* + * Sleeping on runningbufspace while holding + * vnode lock leads to deadlock. + */ + if (curproc == bufdaemonproc && + runningbufspace > hirunningspace) waitrunningbufspace(); continue; }