From owner-freebsd-hackers  Wed Jan 16 10:51:36 2002
Delivered-To: freebsd-hackers@freebsd.org
Received: from apollo.backplane.com (apollo.backplane.com [216.240.41.2])
	by hub.freebsd.org (Postfix) with ESMTP
	id CE83637B41F; Wed, 16 Jan 2002 10:51:06 -0800 (PST)
Received: (from dillon@localhost)
	by apollo.backplane.com (8.11.6/8.9.1) id g0GIowg68383;
	Wed, 16 Jan 2002 10:50:58 -0800 (PST)
	(envelope-from dillon)
Date: Wed, 16 Jan 2002 10:50:58 -0800 (PST)
From: Matthew Dillon <dillon@apollo.backplane.com>
Message-Id: <200201161850.g0GIowg68383@apollo.backplane.com>
To: Ian Dowse <iedowse@maths.tcd.ie>
Cc: Alfred Perlstein <bright@mu.org>, "Alan L. Cox" <alc@imimic.com>,
	FreeBSD-hackers@FreeBSD.ORG, re@FreeBSD.ORG
Subject: Re: Need review of NFS patch set for server .. missing/wrong vput() issues 
References:  <200201130239.aa98693@salmon.maths.tcd.ie>
Sender: owner-freebsd-hackers@FreeBSD.ORG
Precedence: bulk
List-ID: <freebsd-hackers.FreeBSD.ORG>
List-Archive: <http://docs.freebsd.org/mail/> (Web Archive)
List-Help: <mailto:majordomo@FreeBSD.ORG?subject=help> (List Instructions)
List-Subscribe: <mailto:majordomo@FreeBSD.ORG?subject=subscribe%20freebsd-hackers>
List-Unsubscribe: <mailto:majordomo@FreeBSD.ORG?subject=unsubscribe%20freebsd-hackers>
X-Loop: FreeBSD.ORG


:
:In message <200201130201.g0D21Xh49451@apollo.backplane.com>, Matthew Dillon wri
:tes:
:>    Ok, cool.  I'll get the commit gears started for the 
:>    first part of the patch.
:
:FYI, I was able to reproduce this and confirm that the first part
:of your patch fixes it. All that it takes is for the mknod to fail
:because the name already exists, but normally this is masked by the
:client because it does an NFSPROC_ACCESS RPC first.
:
:Another nasty bug in nfsrv_mknod that I just spotted is that it
:doesn't override the S_IFMT bits of the file mode supplied by the
:client. It should be completely ignoring those bits, and using only
:the node-type it has in the `vtyp' variable. I just managed to
:create a node that makes ls say "Bad file descriptor" by passing
:in a type of NFFIFO and a mode of 0...
:
:Ian

    Alexey hasn't had any crashes since applying this part of the NFS mknod
    patch.  I assume it is a go, but the RE's have yet to respond to my MFC
    request.

    Two real questions are whether the recent NFS fixes by Ian and the
    recent softupdates fixes by Kirk should be MFC'd (in addition to my
    NFS fix).  I think Ian's mknod tests are a no-brainer.  They should
    just go in, as should my mknod fix.

    Some of Kirk's fixes are fairly serious.  They include:

    #1 Fix corruption that can occur if a RW mount is downgraded to RO

    #2 Fix spl confusion that can occcur in ACQUIRE_LOCK*() softupdates
      routines 

    #3 Fix softupdates panic that can occur during heavy I/O
      (see 'drain_output' calls in patch below)

    I have included Kirk's patch (for stable) below for review.  It's a bit
    messy so I will note that the most important fix is #3 above, and it is
    a very simple and tiny portion of the below patch.

    It would be a shame if the simpler NFS & softupdates fixes didn't make
    it into 4.5.

    Additionally, I have another two or three patches above and beyond the
    ones discussed above that can go in after the 4.5 tags are laid down.
    My tree is starting to get rather messy from the delays :-(

					-Matt
					Matthew Dillon 
					<dillon@backplane.com>

Index: ffs_inode.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_inode.c,v
retrieving revision 1.56.2.4
diff -u -r1.56.2.4 ffs_inode.c
--- ffs_inode.c	20 Dec 2001 21:10:52 -0000	1.56.2.4
+++ ffs_inode.c	16 Jan 2002 01:09:40 -0000
@@ -85,9 +85,9 @@
 	if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
 		return (0);
 	ip->i_flag &= ~(IN_LAZYMOD | IN_MODIFIED);
-	if (vp->v_mount->mnt_flag & MNT_RDONLY)
-		return (0);
 	fs = ip->i_fs;
+	if (fs->fs_ronly)
+		return (0);
 	/*
 	 * Ensure that uid and gid are correct. This is a temporary
 	 * fix until fsck has been changed to do the update.
@@ -169,6 +169,8 @@
 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
 		return (UFS_UPDATE(ovp, 0));
 	}
+	if (fs->fs_ronly)
+		panic("ffs_truncate: read-only filesystem");
 #ifdef QUOTA
 	error = getinoquota(oip);
 	if (error)
Index: ffs_softdep.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_softdep.c,v
retrieving revision 1.57.2.9
diff -u -r1.57.2.9 ffs_softdep.c
--- ffs_softdep.c	2 Mar 2001 17:22:26 -0000	1.57.2.9
+++ ffs_softdep.c	12 Jan 2002 21:26:01 -0000
@@ -233,8 +233,6 @@
 } lk = { 0 };
 #define ACQUIRE_LOCK(lk)		(lk)->lkt_spl = splbio()
 #define FREE_LOCK(lk)			splx((lk)->lkt_spl)
-#define ACQUIRE_LOCK_INTERLOCKED(lk)
-#define FREE_LOCK_INTERLOCKED(lk)
 
 #else /* DEBUG */
 static struct lockit {
@@ -245,13 +243,10 @@
 
 static	void acquire_lock __P((struct lockit *));
 static	void free_lock __P((struct lockit *));
-static	void acquire_lock_interlocked __P((struct lockit *));
-static	void free_lock_interlocked __P((struct lockit *));
+void	softdep_panic __P((char *));
 
 #define ACQUIRE_LOCK(lk)		acquire_lock(lk)
 #define FREE_LOCK(lk)			free_lock(lk)
-#define ACQUIRE_LOCK_INTERLOCKED(lk)	acquire_lock_interlocked(lk)
-#define FREE_LOCK_INTERLOCKED(lk)	free_lock_interlocked(lk)
 
 static void
 acquire_lock(lk)
@@ -283,36 +278,78 @@
 	splx(lk->lkt_spl);
 }
 
-static void
-acquire_lock_interlocked(lk)
+/*
+ * Function to release soft updates lock and panic.
+ */
+void
+softdep_panic(msg)
+	char *msg;
+{
+
+	if (lk.lkt_held != -1)
+		FREE_LOCK(&lk);
+	panic(msg);
+}
+#endif /* DEBUG */
+
+static	int interlocked_sleep __P((struct lockit *, int, void *, int,
+	    const char *, int));
+
+/*
+ * When going to sleep, we must save our SPL so that it does
+ * not get lost if some other process uses the lock while we
+ * are sleeping. We restore it after we have slept. This routine
+ * wraps the interlocking with functions that sleep. The list
+ * below enumerates the available set of operations.
+ */
+#define	UNKNOWN		0
+#define	SLEEP		1
+#define	LOCKBUF		2
+
+static int
+interlocked_sleep(lk, op, ident, flags, wmesg, timo)
 	struct lockit *lk;
+	int op;
+	void *ident;
+	int flags;
+	const char *wmesg;
+	int timo;
 {
 	pid_t holder;
+	int s, retval;
 
+	s = lk->lkt_spl;
+#	ifdef DEBUG
+	if (lk->lkt_held == -1)
+		panic("interlocked_sleep: lock not held");
+	lk->lkt_held = -1;
+#	endif /* DEBUG */
+	switch (op) {
+	case SLEEP:
+		retval = tsleep(ident, flags, wmesg, timo);
+		break;
+	case LOCKBUF:
+		retval = BUF_LOCK((struct buf *)ident, flags);
+		break;
+	default:
+		panic("interlocked_sleep: unknown operation");
+	}
+#	ifdef DEBUG
 	if (lk->lkt_held != -1) {
 		holder = lk->lkt_held;
 		FREE_LOCK(lk);
 		if (holder == CURPROC->p_pid)
-			panic("softdep_lock_interlocked: locking against self");
+			panic("interlocked_sleep: locking against self");
 		else
-			panic("softdep_lock_interlocked: lock held by %d",
-			    holder);
+			panic("interlocked_sleep: lock held by %d", holder);
 	}
 	lk->lkt_held = CURPROC->p_pid;
 	lockcnt++;
+#	endif /* DEBUG */
+	lk->lkt_spl = s;
+	return (retval);
 }
 
-static void
-free_lock_interlocked(lk)
-	struct lockit *lk;
-{
-
-	if (lk->lkt_held == -1)
-		panic("softdep_unlock_interlocked: lock not held");
-	lk->lkt_held = -1;
-}
-#endif /* DEBUG */
-
 /*
  * Place holder for real semaphores.
  */
@@ -348,12 +385,13 @@
 {
 
 	if (semap->value++ > 0) {
-		if (interlock != NULL)
-			FREE_LOCK_INTERLOCKED(interlock);
-		tsleep((caddr_t)semap, semap->prio, semap->name, semap->timo);
 		if (interlock != NULL) {
-			ACQUIRE_LOCK_INTERLOCKED(interlock);
+			interlocked_sleep(interlock, SLEEP, (caddr_t)semap,
+			    semap->prio, semap->name, semap->timo);
 			FREE_LOCK(interlock);
+		} else {
+			tsleep((caddr_t)semap, semap->prio, semap->name,
+			    semap->timo);
 		}
 		return (0);
 	}
@@ -4080,6 +4118,11 @@
 	 */
 	waitfor = MNT_NOWAIT;
 top:
+	/*
+	 * We must wait for any I/O in progress to finish so that
+	 * all potential buffers on the dirty list will be visible.
+	 */
+	drain_output(vp, 1);
 	if (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT) == 0) {
 		FREE_LOCK(&lk);
 		return (0);
@@ -4174,6 +4217,8 @@
 					bawrite(bp);
 					return (error);
 				}
+				if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != 0)
+					panic("softdep_sync_metadata: flush_pagedep_deps failed");
 			}
 			break;
 
@@ -4236,15 +4281,8 @@
 		goto loop;
 	}
 	/*
-	 * We must wait for any I/O in progress to finish so that
-	 * all potential buffers on the dirty list will be visible.
-	 * Once they are all there, proceed with the second pass
-	 * which will wait for the I/O as per above.
-	 */
-	drain_output(vp, 1);
-	/*
 	 * The brief unlock is to allow any pent up dependency
-	 * processing to be done.
+	 * processing to be done. Then proceed with the second pass.
 	 */
 	if (waitfor == MNT_NOWAIT) {
 		waitfor = MNT_WAIT;
@@ -4257,13 +4295,20 @@
 	 * If we have managed to get rid of all the dirty buffers,
 	 * then we are done. For certain directories and block
 	 * devices, we may need to do further work.
+	 *
+	 * We must wait for any I/O in progress to finish so that
+	 * all potential buffers on the dirty list will be visible.
 	 */
+	drain_output(vp, 1);
 	if (TAILQ_FIRST(&vp->v_dirtyblkhd) == NULL) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
 
 	FREE_LOCK(&lk);
+	if (!vn_isdisk(vp, NULL) && TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
+		panic("softdep_sync_metadata: flush failed");
+
 	/*
 	 * If we are trying to sync a block device, some of its buffers may
 	 * contain metadata that cannot be written until the contents of some
@@ -4591,9 +4636,8 @@
 	proc_waiting += 1;
 	if (handle.callout == NULL)
 		handle = timeout(pause_timer, 0, tickdelay > 2 ? tickdelay : 2);
-	FREE_LOCK_INTERLOCKED(&lk);
-	(void) tsleep((caddr_t)&proc_waiting, PPAUSE, "softupdate", 0);
-	ACQUIRE_LOCK_INTERLOCKED(&lk);
+	interlocked_sleep(&lk, SLEEP, (caddr_t)&proc_waiting, PPAUSE,
+	    "softupdate", 0);
 	proc_waiting -= 1;
 	if (islocked == 0)
 		FREE_LOCK(&lk);
@@ -4824,6 +4868,7 @@
 	int waitfor;
 {
 	struct buf *bp;
+	int error;
 
 	for (;;) {
 		if ((bp = *bpp) == NULL)
@@ -4835,17 +4880,18 @@
 			if (waitfor != MNT_WAIT)
 				return (0);
 			bp->b_xflags |= BX_BKGRDWAIT;
-			FREE_LOCK_INTERLOCKED(&lk);
-			tsleep(&bp->b_xflags, PRIBIO, "getbuf", 0);
-			ACQUIRE_LOCK_INTERLOCKED(&lk);
+			interlocked_sleep(&lk, SLEEP, &bp->b_xflags, PRIBIO,
+			    "getbuf", 0);
 			continue;
 		}
 		if (waitfor != MNT_WAIT)
 			return (0);
-		FREE_LOCK_INTERLOCKED(&lk);
-		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL) != ENOLCK)
+		error = interlocked_sleep(&lk, LOCKBUF, bp,
+		    LK_EXCLUSIVE | LK_SLEEPFAIL, 0, 0);
+		if (error != ENOLCK) {
+			FREE_LOCK(&lk);
 			panic("getdirtybuf: inconsistent lock");
-		ACQUIRE_LOCK_INTERLOCKED(&lk);
+		}
 	}
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		BUF_UNLOCK(bp);
@@ -4869,9 +4915,8 @@
 		ACQUIRE_LOCK(&lk);
 	while (vp->v_numoutput) {
 		vp->v_flag |= VBWAIT;
-		FREE_LOCK_INTERLOCKED(&lk);
-		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "drainvp", 0);
-		ACQUIRE_LOCK_INTERLOCKED(&lk);
+		interlocked_sleep(&lk, SLEEP, (caddr_t)&vp->v_numoutput,
+		    PRIBIO + 1, "drainvp", 0);
 	}
 	if (!islocked)
 		FREE_LOCK(&lk);
Index: ffs_vfsops.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_vfsops.c,v
retrieving revision 1.117.2.7
diff -u -r1.117.2.7 ffs_vfsops.c
--- ffs_vfsops.c	25 Dec 2001 01:44:44 -0000	1.117.2.7
+++ ffs_vfsops.c	15 Jan 2002 07:23:36 -0000
@@ -190,6 +190,14 @@
 		err = 0;
 		ronly = fs->fs_ronly;	/* MNT_RELOAD might change this */
 		if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+			/*
+			 * Flush any dirty data.
+			 */
+			VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
+			/*
+			 * Check for and optionally get rid of files open
+			 * for writing.
+			 */
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
Index: ffs_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/ufs/ffs/ffs_vnops.c,v
retrieving revision 1.64
diff -u -r1.64 ffs_vnops.c
--- ffs_vnops.c	10 Jan 2000 12:04:25 -0000	1.64
+++ ffs_vnops.c	11 Jan 2002 18:05:14 -0000
@@ -106,6 +106,7 @@
 VNODEOP_SET(ffs_fifoop_opv_desc);
 
 #include <ufs/ufs/ufs_readwrite.c>
+#define VDRAINED    0x00800
 
 /*
  * Synch an open file.
@@ -248,7 +249,11 @@
 		splx(s);
 		if ((error = softdep_sync_metadata(ap)) != 0)
 			return (error);
+		if (!vn_isdisk(vp, NULL) && vp->v_numoutput > 0)
+			panic("ffs_fsync: fsync failed 6");
 		s = splbio();
+		if (!vn_isdisk(vp, NULL) && vp->v_numoutput > 0)
+			panic("ffs_fsync: fsync failed 7");
 
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 			/*
@@ -263,12 +268,26 @@
 				passes -= 1;
 				goto loop;
 			}
-#ifdef DIAGNOSTIC
-			if (!vn_isdisk(vp, NULL))
+			if (!vn_isdisk(vp, NULL)) {
 				vprint("ffs_fsync: dirty", vp);
-#endif
+				panic("ffs_fsync: fsync failed 1");
+			}
 		}
+		if (!vn_isdisk(vp, NULL) && vp->v_numoutput > 0)
+			panic("ffs_fsync: fsync failed 5");
+		vp->v_flag |= VDRAINED;
 	}
+	if (wait && !vn_isdisk(vp, NULL) &&
+	   (vp->v_numoutput > 0 || !TAILQ_EMPTY(&vp->v_dirtyblkhd)))
+		panic("ffs_fsync: fsync failed 4");
 	splx(s);
-	return (UFS_UPDATE(vp, wait));
+	if (wait && !vn_isdisk(vp, NULL) &&
+	   (vp->v_numoutput > 0 || !TAILQ_EMPTY(&vp->v_dirtyblkhd)))
+		panic("ffs_fsync: fsync failed 2");
+	vp->v_flag &=~ VDRAINED;
+	error = UFS_UPDATE(vp, wait);
+	if (wait && !vn_isdisk(vp, NULL) &&
+	   (vp->v_numoutput > 0 || !TAILQ_EMPTY(&vp->v_dirtyblkhd)))
+		panic("ffs_fsync: fsync failed 3");
+	return (error);
 }

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-hackers" in the body of the message