Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 11 Sep 2019 21:24:15 +0000 (UTC)
From:      Conrad Meyer <cem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r352233 - in head: sys/fs/msdosfs sys/kern sys/sys usr.sbin/makefs/msdos
Message-ID:  <201909112124.x8BLOFtt051601@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: cem
Date: Wed Sep 11 21:24:14 2019
New Revision: 352233
URL: https://svnweb.freebsd.org/changeset/base/352233

Log:
  buf: Add B_INVALONERR flag to discard data
  
  Setting the B_INVALONERR flag before a synchronous write causes the buf
  cache to forcibly invalidate contents if the write fails (BIO_ERROR).
  
  This is intended to be used to allow layers above the buffer cache to make
  more informed decisions about when discarding dirty buffers without
  successful write is acceptable.
  
  As a proof of concept, use in msdosfs to handle failures to mark the on-disk
  'dirty' bit during rw mount or ro->rw update.
  
  Extending this to other filesystems is left as future work.
  
  PR:		210316
  Reviewed by:	kib (with objections)
  Sponsored by:	Dell EMC Isilon
  Differential Revision:	https://reviews.freebsd.org/D21539

Modified:
  head/sys/fs/msdosfs/fat.h
  head/sys/fs/msdosfs/msdosfs_fat.c
  head/sys/fs/msdosfs/msdosfs_vfsops.c
  head/sys/kern/vfs_bio.c
  head/sys/sys/buf.h
  head/usr.sbin/makefs/msdos/msdosfs_denode.c
  head/usr.sbin/makefs/msdos/msdosfs_fat.c
  head/usr.sbin/makefs/msdos/msdosfs_lookup.c
  head/usr.sbin/makefs/msdos/msdosfs_vfsops.c
  head/usr.sbin/makefs/msdos/msdosfs_vnops.c

Modified: head/sys/fs/msdosfs/fat.h
==============================================================================
--- head/sys/fs/msdosfs/fat.h	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/sys/fs/msdosfs/fat.h	Wed Sep 11 21:24:14 2019	(r352233)
@@ -103,7 +103,13 @@ int fatentry(int function, struct msdosfsmount *pmp, u
 int freeclusterchain(struct msdosfsmount *pmp, u_long startchain);
 int extendfile(struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, int flags);
 void fc_purge(struct denode *dep, u_int frcn);
-int markvoldirty(struct msdosfsmount *pmp, int dirty);
+int markvoldirty_upgrade(struct msdosfsmount *pmp, bool dirty, bool rw_upgrade);
+
+static inline int
+markvoldirty(struct msdosfsmount *pmp, bool dirty)
+{
+	return (markvoldirty_upgrade(pmp, dirty, false));
+}
 
 #endif	/* _KERNEL || MAKEFS */
 #endif	/* !_FS_MSDOSFS_FAT_H_ */

Modified: head/sys/fs/msdosfs/msdosfs_fat.c
==============================================================================
--- head/sys/fs/msdosfs/msdosfs_fat.c	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/sys/fs/msdosfs/msdosfs_fat.c	Wed Sep 11 21:24:14 2019	(r352233)
@@ -1117,7 +1117,7 @@ extendfile(struct denode *dep, u_long count, struct bu
  *	?	(other errors from called routines)
  */
 int
-markvoldirty(struct msdosfsmount *pmp, int dirty)
+markvoldirty_upgrade(struct msdosfsmount *pmp, bool dirty, bool rw_upgrade)
 {
 	struct buf *bp;
 	u_long bn, bo, bsize, byteoffset, fatval;
@@ -1130,8 +1130,11 @@ markvoldirty(struct msdosfsmount *pmp, int dirty)
 	if (FAT12(pmp))
 		return (0);
 
-	/* Can't change the bit on a read-only filesystem. */
-	if (pmp->pm_flags & MSDOSFSMNT_RONLY)
+	/*
+	 * Can't change the bit on a read-only filesystem, except as part of
+	 * ro->rw upgrade.
+	 */
+	if ((pmp->pm_flags & MSDOSFSMNT_RONLY) != 0 && !rw_upgrade)
 		return (EROFS);
 
 	/*
@@ -1166,6 +1169,29 @@ markvoldirty(struct msdosfsmount *pmp, int dirty)
 			fatval |= 0x8000;
 		putushort(&bp->b_data[bo], fatval);
 	}
+
+	/*
+	 * The concern here is that a devvp may be readonly, without reporting
+	 * itself as such through the usual channels.  In that case, we'd like
+	 * it if attempting to mount msdosfs rw didn't panic the system.
+	 *
+	 * markvoldirty is invoked as the first write on backing devvps when
+	 * either msdosfs is mounted for the first time, or a ro mount is
+	 * upgraded to rw.
+	 *
+	 * In either event, if a write error occurs dirtying the volume:
+	 *   - No user data has been permitted to be written to cache yet.
+	 *   - We can abort the high-level operation (mount, or ro->rw) safely.
+	 *   - We don't derive any benefit from leaving a zombie dirty buf in
+	 *   the cache that can not be cleaned or evicted.
+	 *
+	 * So, mark B_INVALONERR to have bwrite() -> brelse() detect that
+	 * condition and force-invalidate our write to the block if it occurs.
+	 *
+	 * PR 210316 provides more context on the discovery and diagnosis of
+	 * the problem, as well as earlier attempts to solve it.
+	 */
+	bp->b_flags |= B_INVALONERR;
 
 	/* Write out the modified FAT block synchronously. */
 	return (bwrite(bp));

Modified: head/sys/fs/msdosfs/msdosfs_vfsops.c
==============================================================================
--- head/sys/fs/msdosfs/msdosfs_vfsops.c	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/sys/fs/msdosfs/msdosfs_vfsops.c	Wed Sep 11 21:24:14 2019	(r352233)
@@ -311,16 +311,25 @@ msdosfs_mount(struct mount *mp)
 			if (error)
 				return (error);
 
+			/* Now that the volume is modifiable, mark it dirty. */
+			error = markvoldirty_upgrade(pmp, true, true);
+			if (error) {
+				/*
+				 * If dirtying the superblock failed, drop GEOM
+				 * 'w' refs (we're still RO).
+				 */
+				g_topology_lock();
+				(void)g_access(pmp->pm_cp, 0, -1, 0);
+				g_topology_unlock();
+
+				return (error);
+			}
+
 			pmp->pm_fmod = 1;
 			pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
 			MNT_ILOCK(mp);
 			mp->mnt_flag &= ~MNT_RDONLY;
 			MNT_IUNLOCK(mp);
-
-			/* Now that the volume is modifiable, mark it dirty. */
-			error = markvoldirty(pmp, 1);
-			if (error)
-				return (error);
 		}
 	}
 	/*
@@ -701,10 +710,8 @@ mountmsdosfs(struct vnode *devvp, struct mount *mp)
 	if (ronly)
 		pmp->pm_flags |= MSDOSFSMNT_RONLY;
 	else {
-		if ((error = markvoldirty(pmp, 1)) != 0) {
-			(void)markvoldirty(pmp, 0);
+		if ((error = markvoldirty(pmp, 1)) != 0)
 			goto error_exit;
-		}
 		pmp->pm_fmod = 1;
 	}
 	mp->mnt_data =  pmp;

Modified: head/sys/kern/vfs_bio.c
==============================================================================
--- head/sys/kern/vfs_bio.c	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/sys/kern/vfs_bio.c	Wed Sep 11 21:24:14 2019	(r352233)
@@ -2614,6 +2614,19 @@ brelse(struct buf *bp)
 		BO_UNLOCK(bp->b_bufobj);
 		bdirty(bp);
 	}
+
+	if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
+	    (bp->b_flags & B_INVALONERR)) {
+		/*
+		 * Forced invalidation of dirty buffer contents, to be used
+		 * after a failed write in the rare case that the loss of the
+		 * contents is acceptable.  The buffer is invalidated and
+		 * freed.
+		 */
+		bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+		bp->b_flags &= ~(B_ASYNC | B_CACHE);
+	}
+
 	if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
 	    (bp->b_error != ENXIO || !LIST_EMPTY(&bp->b_dep)) &&
 	    !(bp->b_flags & B_INVAL)) {

Modified: head/sys/sys/buf.h
==============================================================================
--- head/sys/sys/buf.h	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/sys/sys/buf.h	Wed Sep 11 21:24:14 2019	(r352233)
@@ -196,6 +196,11 @@ struct buf {
  *			may not be used with the stage 1 data write under NFS
  *			but may be used for the commit rpc portion.
  *
+ *	B_INVALONERR	This flag is set on dirty buffers.  It specifies that a
+ *			write error should forcibly invalidate the buffer
+ *			contents.  This flag should be used with caution, as it
+ *			discards data.  It is incompatible with B_ASYNC.
+ *
  *	B_VMIO		Indicates that the buffer is tied into an VM object.
  *			The buffer's data is always PAGE_SIZE aligned even
  *			if b_bufsize and b_bcount are not.  ( b_bufsize is 
@@ -226,7 +231,7 @@ struct buf {
 #define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
 #define	B_MALLOC	0x00010000	/* malloced b_data */
 #define	B_CLUSTEROK	0x00020000	/* Pagein op, so swap() can count it. */
-#define	B_00040000	0x00040000	/* Available flag. */
+#define	B_INVALONERR	0x00040000	/* Invalidate on write error. */
 #define	B_00080000	0x00080000	/* Available flag. */
 #define	B_00100000	0x00100000	/* Available flag. */
 #define	B_00200000	0x00200000	/* Available flag. */
@@ -243,7 +248,7 @@ struct buf {
 
 #define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
 	"\33paging\32infreecnt\31nocopy\30b23\27relbuf\26b21\25b20" \
-	"\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
+	"\24b19\23invalonerr\22clusterok\21malloc\20nocache\17b14\16inval" \
 	"\15reuse\14noreuse\13eintr\12done\11b8\10delwri" \
 	"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
 

Modified: head/usr.sbin/makefs/msdos/msdosfs_denode.c
==============================================================================
--- head/usr.sbin/makefs/msdos/msdosfs_denode.c	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/usr.sbin/makefs/msdos/msdosfs_denode.c	Wed Sep 11 21:24:14 2019	(r352233)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/errno.h>
 #include <sys/vnode.h>
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>

Modified: head/usr.sbin/makefs/msdos/msdosfs_fat.c
==============================================================================
--- head/usr.sbin/makefs/msdos/msdosfs_fat.c	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/usr.sbin/makefs/msdos/msdosfs_fat.c	Wed Sep 11 21:24:14 2019	(r352233)
@@ -54,6 +54,7 @@
 #include <sys/errno.h>
 
 #include <assert.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <strings.h>

Modified: head/usr.sbin/makefs/msdos/msdosfs_lookup.c
==============================================================================
--- head/usr.sbin/makefs/msdos/msdosfs_lookup.c	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/usr.sbin/makefs/msdos/msdosfs_lookup.c	Wed Sep 11 21:24:14 2019	(r352233)
@@ -53,6 +53,7 @@
 #include <sys/param.h>
 #include <sys/errno.h>
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 

Modified: head/usr.sbin/makefs/msdos/msdosfs_vfsops.c
==============================================================================
--- head/usr.sbin/makefs/msdos/msdosfs_vfsops.c	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/usr.sbin/makefs/msdos/msdosfs_vfsops.c	Wed Sep 11 21:24:14 2019	(r352233)
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mount.h>
 
 #include <errno.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>

Modified: head/usr.sbin/makefs/msdos/msdosfs_vnops.c
==============================================================================
--- head/usr.sbin/makefs/msdos/msdosfs_vnops.c	Wed Sep 11 20:13:38 2019	(r352232)
+++ head/usr.sbin/makefs/msdos/msdosfs_vnops.c	Wed Sep 11 21:24:14 2019	(r352233)
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/time.h>
 
 #include <fcntl.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <string.h>
 #include <time.h>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201909112124.x8BLOFtt051601>