Date: Mon, 29 Jun 2009 15:20:13 +0200 From: Attilio Rao <attilio@freebsd.org> To: pluknet <pluknet@gmail.com> Cc: freebsd-stable <freebsd-stable@freebsd.org> Subject: Re: [nfs] process locks in "bo_wwait" on 6.4 Message-ID: <3bbf2fe10906290620r7005f47dtfef8becc7f439515@mail.gmail.com> In-Reply-To: <a31046fc0906290618r31e5bfb0h9e267fadef53408a@mail.gmail.com> References: <a31046fc0906260944h68eed26bs256dcad1f29871ff@mail.gmail.com> <a31046fc0906260951i66c239e1yfac9048d69784209@mail.gmail.com> <a31046fc0906290329p3e9cff1dwa89c8bb88736fed7@mail.gmail.com> <3bbf2fe10906290458v3d57441ar44c4ed8f36c957f@mail.gmail.com> <a31046fc0906290555ib9abf4fy90502eb01e2ef8d3@mail.gmail.com> <3bbf2fe10906290611j683a0ddawbd524e406e832d54@mail.gmail.com> <a31046fc0906290618r31e5bfb0h9e267fadef53408a@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
2009/6/29 pluknet <pluknet@gmail.com>: > 2009/6/29 Attilio Rao <attilio@freebsd.org>: >> 2009/6/29 pluknet <pluknet@gmail.com>: >>> 2009/6/29 Attilio Rao <attilio@freebsd.org>: >>>> 2009/6/29 pluknet <pluknet@gmail.com>: >>>>> 2009/6/26 pluknet <pluknet@gmail.com>: >>>>>> 2009/6/26 pluknet <pluknet@gmail.com>: >>>>>>> Hello. >>>>>>> >>>>>>> While building a module on nfs mounted /usr/src >>>>>>> I got an unkillable process waiting forever in bo_wwait. >>>>>> >>>>>> Small note: iface on NFS server has mtu changed from 1500 to 1450. >>>>>> Can this be a source of the problem? >>>>> >>>>> This is 100% reproducible. Lock in the same place. Any hints? >>>> >>>> Can you also show the value of ps? >>>> A precise map of what processes are doing would give an help. >>>> Also would be useful to printout traces for other threads and not only >>>> the stucked one. >>>> >>> >>> >From another run: >> >> I'm unable to see who would be locking the buffer object in question. >> Do you have INVARIANT_SUPPORT/INVARIANTS on? > > Yes, I do both. > >> What revision of /usr/src/sys/kern/vfs_bio.c are you running with? >> > > As of 6.4-R: CVS rev 1.491.2.12.4.1 / SVN rev 183531. Please try this patch and report. Thanks, Attilio --- src/sys/nfsclient/nfs_vnops.c 2008/02/13 20:44:18 1.281 +++ src/sys/nfsclient/nfs_vnops.c 2008/03/22 09:15:15 1.282 @@ -33,7 +33,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: /usr/local/www/cvsroot/FreeBSD/src/sys/nfsclient/nfs_vnops.c,v 1.281 2008/02/13 20:44:18 attilio Exp $"); +__FBSDID("$FreeBSD: /usr/local/www/cvsroot/FreeBSD/src/sys/nfsclient/nfs_vnops.c,v 1.282 2008/03/22 09:15:15 jeff Exp $"); /* * vnode op calls for Sun NFS version 2 and 3 @@ -2736,11 +2736,12 @@ nfs_flush(struct vnode *vp, int waitfor, int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); - int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; + int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; + struct bufobj *bo; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif @@ -2751,6 +2752,7 @@ nfs_flush(struct vnode *vp, int waitfor, slpflag = PCATCH; if (!commit) passone = 0; + bo = &vp->v_bufobj; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but has not been committed to stable storage on the server @@ -2763,15 +2765,14 @@ again: endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { - s = splbio(); if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; - VI_LOCK(vp); - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + BO_LOCK(bo); + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) @@ -2788,11 +2789,11 @@ again: * Release the vnode interlock to avoid a lock * order reversal. */ - VI_UNLOCK(vp); + BO_UNLOCK(bo); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); - VI_LOCK(vp); + BO_LOCK(bo); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; @@ -2802,7 +2803,7 @@ again: bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { @@ -2815,7 +2816,7 @@ again: nbp = TAILQ_NEXT(bp, b_bobufs); continue; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); bremfree(bp); /* * Work out if all buffers are using the same cred @@ -2834,7 +2835,7 @@ again: wcred = NOCRED; vfs_busy_pages(bp, 1); - VI_LOCK(vp); + BO_LOCK(bo); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to @@ -2858,8 +2859,7 @@ again: if (toff > endoff) endoff = toff; } - splx(s); - VI_UNLOCK(vp); + BO_UNLOCK(bo); } if (bvecpos > 0) { /* @@ -2911,14 +2911,12 @@ again: * specific. We should probably move that * into bundirty(). XXX */ - s = splbio(); - bufobj_wref(&vp->v_bufobj); + bufobj_wref(bo); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; - splx(s); bufdone(bp); } } @@ -2928,17 +2926,15 @@ again: * Start/do any write(s) that are required. */ loop: - s = splbio(); - VI_LOCK(vp); - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + BO_LOCK(bo); + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - VI_MTX(vp), "nfsfsync", slpflag, slptimeo); - splx(s); + BO_MTX(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) { BUF_UNLOCK(bp); goto loop; @@ -2961,13 +2957,12 @@ loop: BUF_UNLOCK(bp); continue; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC; - splx(s); bwrite(bp); if (nfs_sigintr(nmp, NULL, td)) { error = EINTR; @@ -2975,17 +2970,16 @@ loop: } goto loop; } - splx(s); if (passone) { passone = 0; - VI_UNLOCK(vp); + BO_UNLOCK(bo); goto again; } if (waitfor == MNT_WAIT) { - while (vp->v_bufobj.bo_numoutput) { - error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo); + while (bo->bo_numoutput) { + error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); error = nfs_sigintr(nmp, NULL, td); if (error) goto done; @@ -2993,17 +2987,17 @@ loop: slpflag = 0; slptimeo = 2 * hz; } - VI_LOCK(vp); + BO_LOCK(bo); } } - if (vp->v_bufobj.bo_dirty.bv_cnt != 0 && commit) { - VI_UNLOCK(vp); + if (bo->bo_dirty.bv_cnt != 0 && commit) { + BO_UNLOCK(bo); goto loop; } /* * Wait for all the async IO requests to drain */ - VI_UNLOCK(vp); + BO_UNLOCK(bo); mtx_lock(&np->n_mtx); while (np->n_directio_asyncwr > 0) { np->n_flag |= NFSYNCWAIT; @@ -3020,14 +3014,14 @@ loop: } mtx_unlock(&np->n_mtx); } else - VI_UNLOCK(vp); + BO_UNLOCK(bo); mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } - if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 && - vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0) + if (commit && bo->bo_dirty.bv_cnt == 0 && + bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; mtx_unlock(&np->n_mtx); done: -- Peace can only be achieved by understanding - A. Einstein
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?3bbf2fe10906290620r7005f47dtfef8becc7f439515>