From owner-freebsd-hackers Wed Jan 16 10:51:36 2002 Delivered-To: freebsd-hackers@freebsd.org Received: from apollo.backplane.com (apollo.backplane.com [216.240.41.2]) by hub.freebsd.org (Postfix) with ESMTP id CE83637B41F; Wed, 16 Jan 2002 10:51:06 -0800 (PST) Received: (from dillon@localhost) by apollo.backplane.com (8.11.6/8.9.1) id g0GIowg68383; Wed, 16 Jan 2002 10:50:58 -0800 (PST) (envelope-from dillon) Date: Wed, 16 Jan 2002 10:50:58 -0800 (PST) From: Matthew Dillon Message-Id: <200201161850.g0GIowg68383@apollo.backplane.com> To: Ian Dowse Cc: Alfred Perlstein , "Alan L. Cox" , FreeBSD-hackers@FreeBSD.ORG, re@FreeBSD.ORG Subject: Re: Need review of NFS patch set for server .. missing/wrong vput() issues References: <200201130239.aa98693@salmon.maths.tcd.ie> Sender: owner-freebsd-hackers@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG : :In message <200201130201.g0D21Xh49451@apollo.backplane.com>, Matthew Dillon wri :tes: :> Ok, cool. I'll get the commit gears started for the :> first part of the patch. : :FYI, I was able to reproduce this and confirm that the first part :of your patch fixes it. All that it takes is for the mknod to fail :because the name already exists, but normally this is masked by the :client because it does an NFSPROC_ACCESS RPC first. : :Another nasty bug in nfsrv_mknod that I just spotted is that it :doesn't override the S_IFMT bits of the file mode supplied by the :client. It should be completely ignoring those bits, and using only :the node-type it has in the `vtyp' variable. I just managed to :create a node that makes ls say "Bad file descriptor" by passing :in a type of NFFIFO and a mode of 0... : :Ian Alexey hasn't had any crashes since applying this part of the NFS mknod patch. I assume it is a go, but the RE's have yet to respond to my MFC request. Two real questions are whether the recent NFS fixes by Ian and the recent softupdates fixes by Kirk should be MFC'd (in addition to my NFS fix). I think Ian's mknod tests are a no-brainer. They should just go in, as should my mknod fix. Some of Kirk's fixes are fairly serious. They include: #1 Fix corruption that can occur if a RW mount is downgraded to RO #2 Fix spl confusion that can occcur in ACQUIRE_LOCK*() softupdates routines #3 Fix softupdates panic that can occur during heavy I/O (see 'drain_output' calls in patch below) I have included Kirk's patch (for stable) below for review. It's a bit messy so I will note that the most important fix is #3 above, and it is a very simple and tiny portion of the below patch. It would be a shame if the simpler NFS & softupdates fixes didn't make it into 4.5. Additionally, I have another two or three patches above and beyond the ones discussed above that can go in after the 4.5 tags are laid down. My tree is starting to get rather messy from the delays :-( -Matt Matthew Dillon Index: ffs_inode.c =================================================================== RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_inode.c,v retrieving revision 1.56.2.4 diff -u -r1.56.2.4 ffs_inode.c --- ffs_inode.c 20 Dec 2001 21:10:52 -0000 1.56.2.4 +++ ffs_inode.c 16 Jan 2002 01:09:40 -0000 @@ -85,9 +85,9 @@ if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) return (0); ip->i_flag &= ~(IN_LAZYMOD | IN_MODIFIED); - if (vp->v_mount->mnt_flag & MNT_RDONLY) - return (0); fs = ip->i_fs; + if (fs->fs_ronly) + return (0); /* * Ensure that uid and gid are correct. This is a temporary * fix until fsck has been changed to do the update. @@ -169,6 +169,8 @@ oip->i_flag |= IN_CHANGE | IN_UPDATE; return (UFS_UPDATE(ovp, 0)); } + if (fs->fs_ronly) + panic("ffs_truncate: read-only filesystem"); #ifdef QUOTA error = getinoquota(oip); if (error) Index: ffs_softdep.c =================================================================== RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_softdep.c,v retrieving revision 1.57.2.9 diff -u -r1.57.2.9 ffs_softdep.c --- ffs_softdep.c 2 Mar 2001 17:22:26 -0000 1.57.2.9 +++ ffs_softdep.c 12 Jan 2002 21:26:01 -0000 @@ -233,8 +233,6 @@ } lk = { 0 }; #define ACQUIRE_LOCK(lk) (lk)->lkt_spl = splbio() #define FREE_LOCK(lk) splx((lk)->lkt_spl) -#define ACQUIRE_LOCK_INTERLOCKED(lk) -#define FREE_LOCK_INTERLOCKED(lk) #else /* DEBUG */ static struct lockit { @@ -245,13 +243,10 @@ static void acquire_lock __P((struct lockit *)); static void free_lock __P((struct lockit *)); -static void acquire_lock_interlocked __P((struct lockit *)); -static void free_lock_interlocked __P((struct lockit *)); +void softdep_panic __P((char *)); #define ACQUIRE_LOCK(lk) acquire_lock(lk) #define FREE_LOCK(lk) free_lock(lk) -#define ACQUIRE_LOCK_INTERLOCKED(lk) acquire_lock_interlocked(lk) -#define FREE_LOCK_INTERLOCKED(lk) free_lock_interlocked(lk) static void acquire_lock(lk) @@ -283,36 +278,78 @@ splx(lk->lkt_spl); } -static void -acquire_lock_interlocked(lk) +/* + * Function to release soft updates lock and panic. + */ +void +softdep_panic(msg) + char *msg; +{ + + if (lk.lkt_held != -1) + FREE_LOCK(&lk); + panic(msg); +} +#endif /* DEBUG */ + +static int interlocked_sleep __P((struct lockit *, int, void *, int, + const char *, int)); + +/* + * When going to sleep, we must save our SPL so that it does + * not get lost if some other process uses the lock while we + * are sleeping. We restore it after we have slept. This routine + * wraps the interlocking with functions that sleep. The list + * below enumerates the available set of operations. + */ +#define UNKNOWN 0 +#define SLEEP 1 +#define LOCKBUF 2 + +static int +interlocked_sleep(lk, op, ident, flags, wmesg, timo) struct lockit *lk; + int op; + void *ident; + int flags; + const char *wmesg; + int timo; { pid_t holder; + int s, retval; + s = lk->lkt_spl; +# ifdef DEBUG + if (lk->lkt_held == -1) + panic("interlocked_sleep: lock not held"); + lk->lkt_held = -1; +# endif /* DEBUG */ + switch (op) { + case SLEEP: + retval = tsleep(ident, flags, wmesg, timo); + break; + case LOCKBUF: + retval = BUF_LOCK((struct buf *)ident, flags); + break; + default: + panic("interlocked_sleep: unknown operation"); + } +# ifdef DEBUG if (lk->lkt_held != -1) { holder = lk->lkt_held; FREE_LOCK(lk); if (holder == CURPROC->p_pid) - panic("softdep_lock_interlocked: locking against self"); + panic("interlocked_sleep: locking against self"); else - panic("softdep_lock_interlocked: lock held by %d", - holder); + panic("interlocked_sleep: lock held by %d", holder); } lk->lkt_held = CURPROC->p_pid; lockcnt++; +# endif /* DEBUG */ + lk->lkt_spl = s; + return (retval); } -static void -free_lock_interlocked(lk) - struct lockit *lk; -{ - - if (lk->lkt_held == -1) - panic("softdep_unlock_interlocked: lock not held"); - lk->lkt_held = -1; -} -#endif /* DEBUG */ - /* * Place holder for real semaphores. */ @@ -348,12 +385,13 @@ { if (semap->value++ > 0) { - if (interlock != NULL) - FREE_LOCK_INTERLOCKED(interlock); - tsleep((caddr_t)semap, semap->prio, semap->name, semap->timo); if (interlock != NULL) { - ACQUIRE_LOCK_INTERLOCKED(interlock); + interlocked_sleep(interlock, SLEEP, (caddr_t)semap, + semap->prio, semap->name, semap->timo); FREE_LOCK(interlock); + } else { + tsleep((caddr_t)semap, semap->prio, semap->name, + semap->timo); } return (0); } @@ -4080,6 +4118,11 @@ */ waitfor = MNT_NOWAIT; top: + /* + * We must wait for any I/O in progress to finish so that + * all potential buffers on the dirty list will be visible. + */ + drain_output(vp, 1); if (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT) == 0) { FREE_LOCK(&lk); return (0); @@ -4174,6 +4217,8 @@ bawrite(bp); return (error); } + if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != 0) + panic("softdep_sync_metadata: flush_pagedep_deps failed"); } break; @@ -4236,15 +4281,8 @@ goto loop; } /* - * We must wait for any I/O in progress to finish so that - * all potential buffers on the dirty list will be visible. - * Once they are all there, proceed with the second pass - * which will wait for the I/O as per above. - */ - drain_output(vp, 1); - /* * The brief unlock is to allow any pent up dependency - * processing to be done. + * processing to be done. Then proceed with the second pass. */ if (waitfor == MNT_NOWAIT) { waitfor = MNT_WAIT; @@ -4257,13 +4295,20 @@ * If we have managed to get rid of all the dirty buffers, * then we are done. For certain directories and block * devices, we may need to do further work. + * + * We must wait for any I/O in progress to finish so that + * all potential buffers on the dirty list will be visible. */ + drain_output(vp, 1); if (TAILQ_FIRST(&vp->v_dirtyblkhd) == NULL) { FREE_LOCK(&lk); return (0); } FREE_LOCK(&lk); + if (!vn_isdisk(vp, NULL) && TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL) + panic("softdep_sync_metadata: flush failed"); + /* * If we are trying to sync a block device, some of its buffers may * contain metadata that cannot be written until the contents of some @@ -4591,9 +4636,8 @@ proc_waiting += 1; if (handle.callout == NULL) handle = timeout(pause_timer, 0, tickdelay > 2 ? tickdelay : 2); - FREE_LOCK_INTERLOCKED(&lk); - (void) tsleep((caddr_t)&proc_waiting, PPAUSE, "softupdate", 0); - ACQUIRE_LOCK_INTERLOCKED(&lk); + interlocked_sleep(&lk, SLEEP, (caddr_t)&proc_waiting, PPAUSE, + "softupdate", 0); proc_waiting -= 1; if (islocked == 0) FREE_LOCK(&lk); @@ -4824,6 +4868,7 @@ int waitfor; { struct buf *bp; + int error; for (;;) { if ((bp = *bpp) == NULL) @@ -4835,17 +4880,18 @@ if (waitfor != MNT_WAIT) return (0); bp->b_xflags |= BX_BKGRDWAIT; - FREE_LOCK_INTERLOCKED(&lk); - tsleep(&bp->b_xflags, PRIBIO, "getbuf", 0); - ACQUIRE_LOCK_INTERLOCKED(&lk); + interlocked_sleep(&lk, SLEEP, &bp->b_xflags, PRIBIO, + "getbuf", 0); continue; } if (waitfor != MNT_WAIT) return (0); - FREE_LOCK_INTERLOCKED(&lk); - if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL) != ENOLCK) + error = interlocked_sleep(&lk, LOCKBUF, bp, + LK_EXCLUSIVE | LK_SLEEPFAIL, 0, 0); + if (error != ENOLCK) { + FREE_LOCK(&lk); panic("getdirtybuf: inconsistent lock"); - ACQUIRE_LOCK_INTERLOCKED(&lk); + } } if ((bp->b_flags & B_DELWRI) == 0) { BUF_UNLOCK(bp); @@ -4869,9 +4915,8 @@ ACQUIRE_LOCK(&lk); while (vp->v_numoutput) { vp->v_flag |= VBWAIT; - FREE_LOCK_INTERLOCKED(&lk); - tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "drainvp", 0); - ACQUIRE_LOCK_INTERLOCKED(&lk); + interlocked_sleep(&lk, SLEEP, (caddr_t)&vp->v_numoutput, + PRIBIO + 1, "drainvp", 0); } if (!islocked) FREE_LOCK(&lk); Index: ffs_vfsops.c =================================================================== RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_vfsops.c,v retrieving revision 1.117.2.7 diff -u -r1.117.2.7 ffs_vfsops.c --- ffs_vfsops.c 25 Dec 2001 01:44:44 -0000 1.117.2.7 +++ ffs_vfsops.c 15 Jan 2002 07:23:36 -0000 @@ -190,6 +190,14 @@ err = 0; ronly = fs->fs_ronly; /* MNT_RELOAD might change this */ if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { + /* + * Flush any dirty data. + */ + VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); + /* + * Check for and optionally get rid of files open + * for writing. + */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; Index: ffs_vnops.c =================================================================== RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_vnops.c,v retrieving revision 1.64 diff -u -r1.64 ffs_vnops.c --- ffs_vnops.c 10 Jan 2000 12:04:25 -0000 1.64 +++ ffs_vnops.c 11 Jan 2002 18:05:14 -0000 @@ -106,6 +106,7 @@ VNODEOP_SET(ffs_fifoop_opv_desc); #include +#define VDRAINED 0x00800 /* * Synch an open file. @@ -248,7 +249,11 @@ splx(s); if ((error = softdep_sync_metadata(ap)) != 0) return (error); + if (!vn_isdisk(vp, NULL) && vp->v_numoutput > 0) + panic("ffs_fsync: fsync failed 6"); s = splbio(); + if (!vn_isdisk(vp, NULL) && vp->v_numoutput > 0) + panic("ffs_fsync: fsync failed 7"); if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { /* @@ -263,12 +268,26 @@ passes -= 1; goto loop; } -#ifdef DIAGNOSTIC - if (!vn_isdisk(vp, NULL)) + if (!vn_isdisk(vp, NULL)) { vprint("ffs_fsync: dirty", vp); -#endif + panic("ffs_fsync: fsync failed 1"); + } } + if (!vn_isdisk(vp, NULL) && vp->v_numoutput > 0) + panic("ffs_fsync: fsync failed 5"); + vp->v_flag |= VDRAINED; } + if (wait && !vn_isdisk(vp, NULL) && + (vp->v_numoutput > 0 || !TAILQ_EMPTY(&vp->v_dirtyblkhd))) + panic("ffs_fsync: fsync failed 4"); splx(s); - return (UFS_UPDATE(vp, wait)); + if (wait && !vn_isdisk(vp, NULL) && + (vp->v_numoutput > 0 || !TAILQ_EMPTY(&vp->v_dirtyblkhd))) + panic("ffs_fsync: fsync failed 2"); + vp->v_flag &=~ VDRAINED; + error = UFS_UPDATE(vp, wait); + if (wait && !vn_isdisk(vp, NULL) && + (vp->v_numoutput > 0 || !TAILQ_EMPTY(&vp->v_dirtyblkhd))) + panic("ffs_fsync: fsync failed 3"); + return (error); } To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-hackers" in the body of the message