From owner-svn-src-all@freebsd.org  Fri Nov 11 20:18:10 2016
Return-Path: <owner-svn-src-all@freebsd.org>
Delivered-To: svn-src-all@mailman.ysv.freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org
 [IPv6:2001:1900:2254:206a::19:1])
 by mailman.ysv.freebsd.org (Postfix) with ESMTP id 5D07AC3CC14;
 Fri, 11 Nov 2016 20:18:10 +0000 (UTC) (envelope-from kib@FreeBSD.org)
Received: from repo.freebsd.org (repo.freebsd.org
 [IPv6:2610:1c1:1:6068::e6a:0])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (Client did not present a certificate)
 by mx1.freebsd.org (Postfix) with ESMTPS id 1D9AC1723;
 Fri, 11 Nov 2016 20:18:10 +0000 (UTC) (envelope-from kib@FreeBSD.org)
Received: from repo.freebsd.org ([127.0.1.37])
 by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id uABKI9rS018735;
 Fri, 11 Nov 2016 20:18:09 GMT (envelope-from kib@FreeBSD.org)
Received: (from kib@localhost)
 by repo.freebsd.org (8.15.2/8.15.2/Submit) id uABKI9uh018732;
 Fri, 11 Nov 2016 20:18:09 GMT (envelope-from kib@FreeBSD.org)
Message-Id: <201611112018.uABKI9uh018732@repo.freebsd.org>
X-Authentication-Warning: repo.freebsd.org: kib set sender to kib@FreeBSD.org
 using -f
From: Konstantin Belousov <kib@FreeBSD.org>
Date: Fri, 11 Nov 2016 20:18:09 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
 svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject: svn commit: r308554 - in stable/11/sys: kern sys ufs/ffs
X-SVN-Group: stable-11
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
X-BeenThere: svn-src-all@freebsd.org
X-Mailman-Version: 2.1.23
Precedence: list
List-Id: "SVN commit messages for the entire src tree \(except for &quot;
 user&quot; and &quot; projects&quot; \)" <svn-src-all.freebsd.org>
List-Unsubscribe: <https://lists.freebsd.org/mailman/options/svn-src-all>,
 <mailto:svn-src-all-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-all/>
List-Post: <mailto:svn-src-all@freebsd.org>
List-Help: <mailto:svn-src-all-request@freebsd.org?subject=help>
List-Subscribe: <https://lists.freebsd.org/mailman/listinfo/svn-src-all>,
 <mailto:svn-src-all-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Fri, 11 Nov 2016 20:18:10 -0000

Author: kib
Date: Fri Nov 11 20:18:08 2016
New Revision: 308554
URL: https://svnweb.freebsd.org/changeset/base/308554

Log:
  MFC r308026:
  Generalize UFS buffer pager.
  
  MFC r308442:
  Tweaks for the buffer pager.

Modified:
  stable/11/sys/kern/vfs_bio.c
  stable/11/sys/sys/buf.h
  stable/11/sys/ufs/ffs/ffs_vnops.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/kern/vfs_bio.c
==============================================================================
--- stable/11/sys/kern/vfs_bio.c	Fri Nov 11 20:09:54 2016	(r308553)
+++ stable/11/sys/kern/vfs_bio.c	Fri Nov 11 20:18:08 2016	(r308554)
@@ -75,9 +75,10 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_page.h>
 #include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <vm/swap_pager.h>
@@ -4691,6 +4692,165 @@ bdata2bio(struct buf *bp, struct bio *bi
 	}
 }
 
+static int buf_pager_relbuf;
+SYSCTL_INT(_vfs, OID_AUTO, buf_pager_relbuf, CTLFLAG_RWTUN,
+    &buf_pager_relbuf, 0,
+    "Make buffer pager release buffers after reading");
+
+/*
+ * The buffer pager.  It uses buffer reads to validate pages.
+ *
+ * In contrast to the generic local pager from vm/vnode_pager.c, this
+ * pager correctly and easily handles volumes where the underlying
+ * device block size is greater than the machine page size.  The
+ * buffer cache transparently extends the requested page run to be
+ * aligned at the block boundary, and does the necessary bogus page
+ * replacements in the addends to avoid obliterating already valid
+ * pages.
+ *
+ * The only non-trivial issue is that the exclusive busy state for
+ * pages, which is assumed by the vm_pager_getpages() interface, is
+ * incompatible with the VMIO buffer cache's desire to share-busy the
+ * pages.  This function performs a trivial downgrade of the pages'
+ * state before reading buffers, and a less trivial upgrade from the
+ * shared-busy to excl-busy state after the read.
+ */
+int
+vfs_bio_getpages(struct vnode *vp, vm_page_t *ma, int count,
+    int *rbehind, int *rahead, vbg_get_lblkno_t get_lblkno,
+    vbg_get_blksize_t get_blksize)
+{
+	vm_page_t m;
+	vm_object_t object;
+	struct buf *bp;
+	struct mount *mp;
+	daddr_t lbn, lbnp;
+	vm_ooffset_t la, lb, poff, poffe;
+	long bsize;
+	int bo_bs, br_flags, error, i;
+	bool redo, lpart;
+
+	object = vp->v_object;
+	mp = vp->v_mount;
+	la = IDX_TO_OFF(ma[count - 1]->pindex);
+	if (la >= object->un_pager.vnp.vnp_size)
+		return (VM_PAGER_BAD);
+	lpart = la + PAGE_SIZE > object->un_pager.vnp.vnp_size;
+	bo_bs = get_blksize(vp, get_lblkno(vp, IDX_TO_OFF(ma[0]->pindex)));
+	if (rbehind != NULL) {
+		lb = IDX_TO_OFF(ma[0]->pindex);
+		*rbehind = OFF_TO_IDX(lb - rounddown2(lb, bo_bs));
+	}
+	if (rahead != NULL) {
+		*rahead = OFF_TO_IDX(roundup2(la, bo_bs) - la);
+		if (la + IDX_TO_OFF(*rahead) >= object->un_pager.vnp.vnp_size) {
+			*rahead = OFF_TO_IDX(roundup2(object->un_pager.
+			    vnp.vnp_size, PAGE_SIZE) - la);
+		}
+	}
+	br_flags = (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS)
+	    != 0) ? GB_UNMAPPED : 0;
+	VM_OBJECT_WLOCK(object);
+again:
+	for (i = 0; i < count; i++)
+		vm_page_busy_downgrade(ma[i]);
+	VM_OBJECT_WUNLOCK(object);
+
+	lbnp = -1;
+	for (i = 0; i < count; i++) {
+		m = ma[i];
+
+		/*
+		 * Pages are shared busy and the object lock is not
+		 * owned, which together allow for the pages'
+		 * invalidation.  The racy test for validity avoids
+		 * useless creation of the buffer for the most typical
+		 * case when invalidation is not used in redo or for
+		 * parallel read.  The shared->excl upgrade loop at
+		 * the end of the function catches the race in a
+		 * reliable way (protected by the object lock).
+		 */
+		if (m->valid == VM_PAGE_BITS_ALL)
+			continue;
+
+		poff = IDX_TO_OFF(m->pindex);
+		poffe = MIN(poff + PAGE_SIZE, object->un_pager.vnp.vnp_size);
+		for (; poff < poffe; poff += bsize) {
+			lbn = get_lblkno(vp, poff);
+			if (lbn == lbnp)
+				goto next_page;
+			lbnp = lbn;
+
+			bsize = get_blksize(vp, lbn);
+			error = bread_gb(vp, lbn, bsize, curthread->td_ucred,
+			    br_flags, &bp);
+			if (error != 0)
+				goto end_pages;
+			if (LIST_EMPTY(&bp->b_dep)) {
+				/*
+				 * Invalidation clears m->valid, but
+				 * may leave B_CACHE flag if the
+				 * buffer existed at the invalidation
+				 * time.  In this case, recycle the
+				 * buffer to do real read on next
+				 * bread() after redo.
+				 *
+				 * Otherwise B_RELBUF is not strictly
+				 * necessary, enable to reduce buf
+				 * cache pressure.
+				 */
+				if (buf_pager_relbuf ||
+				    m->valid != VM_PAGE_BITS_ALL)
+					bp->b_flags |= B_RELBUF;
+
+				bp->b_flags &= ~B_NOCACHE;
+				brelse(bp);
+			} else {
+				bqrelse(bp);
+			}
+		}
+		KASSERT(1 /* racy, enable for debugging */ ||
+		    m->valid == VM_PAGE_BITS_ALL || i == count - 1,
+		    ("buf %d %p invalid", i, m));
+		if (i == count - 1 && lpart) {
+			VM_OBJECT_WLOCK(object);
+			if (m->valid != 0 &&
+			    m->valid != VM_PAGE_BITS_ALL)
+				vm_page_zero_invalid(m, TRUE);
+			VM_OBJECT_WUNLOCK(object);
+		}
+next_page:;
+	}
+end_pages:
+
+	VM_OBJECT_WLOCK(object);
+	redo = false;
+	for (i = 0; i < count; i++) {
+		vm_page_sunbusy(ma[i]);
+		ma[i] = vm_page_grab(object, ma[i]->pindex, VM_ALLOC_NORMAL);
+
+		/*
+		 * Since the pages were only sbusy while neither the
+		 * buffer nor the object lock was held by us, or
+		 * reallocated while vm_page_grab() slept for busy
+		 * relinguish, they could have been invalidated.
+		 * Recheck the valid bits and re-read as needed.
+		 *
+		 * Note that the last page is made fully valid in the
+		 * read loop, and partial validity for the page at
+		 * index count - 1 could mean that the page was
+		 * invalidated or removed, so we must restart for
+		 * safety as well.
+		 */
+		if (ma[i]->valid != VM_PAGE_BITS_ALL)
+			redo = true;
+	}
+	if (redo && error == 0)
+		goto again;
+	VM_OBJECT_WUNLOCK(object);
+	return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
+}
+
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>

Modified: stable/11/sys/sys/buf.h
==============================================================================
--- stable/11/sys/sys/buf.h	Fri Nov 11 20:09:54 2016	(r308553)
+++ stable/11/sys/sys/buf.h	Fri Nov 11 20:18:08 2016	(r308554)
@@ -68,6 +68,7 @@ extern struct bio_ops {
 } bioops;
 
 struct vm_object;
+struct vm_page;
 
 typedef unsigned char b_xflags_t;
 
@@ -547,6 +548,12 @@ void	bpin(struct buf *);
 void	bunpin(struct buf *);
 void 	bunpin_wait(struct buf *);
 
+typedef daddr_t (vbg_get_lblkno_t)(struct vnode *, vm_ooffset_t);
+typedef int (vbg_get_blksize_t)(struct vnode *, daddr_t);
+int	vfs_bio_getpages(struct vnode *vp, struct vm_page **ma, int count,
+	    int *rbehind, int *rahead, vbg_get_lblkno_t get_lblkno,
+	    vbg_get_blksize_t get_blksize);
+
 #endif /* _KERNEL */
 
 #endif /* !_SYS_BUF_H_ */

Modified: stable/11/sys/ufs/ffs/ffs_vnops.c
==============================================================================
--- stable/11/sys/ufs/ffs/ffs_vnops.c	Fri Nov 11 20:09:54 2016	(r308553)
+++ stable/11/sys/ufs/ffs/ffs_vnops.c	Fri Nov 11 20:18:08 2016	(r308554)
@@ -87,7 +87,6 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
-#include <vm/vm_pageout.h>
 #include <vm/vnode_pager.h>
 
 #include <ufs/ufs/extattr.h>
@@ -1791,160 +1790,33 @@ SYSCTL_DECL(_vfs_ffs);
 static int use_buf_pager = 0;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
     "Always use buffer pager instead of bmap");
-static int buf_pager_relbuf;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, buf_pager_relbuf, CTLFLAG_RWTUN,
-    &buf_pager_relbuf, 0,
-    "Make buffer pager release buffers after reading");
 
-/*
- * The FFS pager.  It uses buffer reads to validate pages.
- *
- * In contrast to the generic local pager from vm/vnode_pager.c, this
- * pager correctly and easily handles volumes where the underlying
- * device block size is greater than the machine page size.  The
- * buffer cache transparently extends the requested page run to be
- * aligned at the block boundary, and does the necessary bogus page
- * replacements in the addends to avoid obliterating already valid
- * pages.
- *
- * The only non-trivial issue is that the exclusive busy state for
- * pages, which is assumed by the vm_pager_getpages() interface, is
- * incompatible with the VMIO buffer cache's desire to share-busy the
- * pages.  This function performs a trivial downgrade of the pages'
- * state before reading buffers, and a less trivial upgrade from the
- * shared-busy to excl-busy state after the read.
- */
+static daddr_t
+ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
+{
+
+	return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
+}
+
+static int
+ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn)
+{
+
+	return (blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn));
+}
+
 static int
 ffs_getpages(struct vop_getpages_args *ap)
 {
 	struct vnode *vp;
-	vm_page_t *ma, m;
-	vm_object_t object;
-	struct buf *bp;
 	struct ufsmount *um;
-	ufs_lbn_t lbn, lbnp;
-	vm_ooffset_t la, lb;
-	long bsize;
-	int bo_bs, count, error, i;
-	bool redo, lpart;
 
 	vp = ap->a_vp;
-	ma = ap->a_m;
-	count = ap->a_count;
+	um = VFSTOUFS(vp->v_mount);
 
-	um = VFSTOUFS(ap->a_vp->v_mount);
-	bo_bs = um->um_devvp->v_bufobj.bo_bsize;
-	if (!use_buf_pager && bo_bs <= PAGE_SIZE)
-		return (vnode_pager_generic_getpages(vp, ma, count,
+	if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
+		return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
 		    ap->a_rbehind, ap->a_rahead, NULL, NULL));
-
-	object = vp->v_object;
-	la = IDX_TO_OFF(ma[count - 1]->pindex);
-	if (la >= object->un_pager.vnp.vnp_size)
-		return (VM_PAGER_BAD);
-	lpart = la + PAGE_SIZE > object->un_pager.vnp.vnp_size;
-	if (ap->a_rbehind != NULL) {
-		lb = IDX_TO_OFF(ma[0]->pindex);
-		*ap->a_rbehind = OFF_TO_IDX(lb - rounddown2(lb, bo_bs));
-	}
-	if (ap->a_rahead != NULL) {
-		*ap->a_rahead = OFF_TO_IDX(roundup2(la, bo_bs) - la);
-		if (la + IDX_TO_OFF(*ap->a_rahead) >=
-		    object->un_pager.vnp.vnp_size) {
-			*ap->a_rahead = OFF_TO_IDX(roundup2(object->un_pager.
-			    vnp.vnp_size, PAGE_SIZE) - la);
-		}
-	}
-	VM_OBJECT_WLOCK(object);
-again:
-	for (i = 0; i < count; i++)
-		vm_page_busy_downgrade(ma[i]);
-	VM_OBJECT_WUNLOCK(object);
-
-	lbnp = -1;
-	for (i = 0; i < count; i++) {
-		m = ma[i];
-
-		/*
-		 * Pages are shared busy and the object lock is not
-		 * owned, which together allow for the pages'
-		 * invalidation.  The racy test for validity avoids
-		 * useless creation of the buffer for the most typical
-		 * case when invalidation is not used in redo or for
-		 * parallel read.  The shared->excl upgrade loop at
-		 * the end of the function catches the race in a
-		 * reliable way (protected by the object lock).
-		 */
-		if (m->valid == VM_PAGE_BITS_ALL)
-			continue;
-
-		lbn = lblkno(um->um_fs, IDX_TO_OFF(m->pindex));
-		if (lbn != lbnp) {
-			bsize = blksize(um->um_fs, VTOI(vp), lbn);
-			error = bread_gb(vp, lbn, bsize, NOCRED, GB_UNMAPPED,
-			    &bp);
-			if (error != 0)
-				break;
-			KASSERT(1 /* racy, enable for debugging */ ||
-			    m->valid == VM_PAGE_BITS_ALL || i == count - 1,
-			    ("buf %d %p invalid", i, m));
-			if (i == count - 1 && lpart) {
-				VM_OBJECT_WLOCK(object);
-				if (m->valid != 0 &&
-				    m->valid != VM_PAGE_BITS_ALL)
-					vm_page_zero_invalid(m, TRUE);
-				VM_OBJECT_WUNLOCK(object);
-			}
-			if (LIST_EMPTY(&bp->b_dep)) {
-				/*
-				 * Invalidation clears m->valid, but
-				 * may leave B_CACHE flag if the
-				 * buffer existed at the invalidation
-				 * time.  In this case, recycle the
-				 * buffer to do real read on next
-				 * bread() after redo.
-				 *
-				 * Otherwise B_RELBUF is not strictly
-				 * necessary, enable to reduce buf
-				 * cache pressure.
-				 */
-				if (buf_pager_relbuf ||
-				    m->valid != VM_PAGE_BITS_ALL)
-					bp->b_flags |= B_RELBUF;
-
-				bp->b_flags &= ~B_NOCACHE;
-				brelse(bp);
-			} else {
-				bqrelse(bp);
-			}
-			lbnp = lbn;
-		}
-	}
-
-	VM_OBJECT_WLOCK(object);
-	redo = false;
-	for (i = 0; i < count; i++) {
-		vm_page_sunbusy(ma[i]);
-		ma[i] = vm_page_grab(object, ma[i]->pindex, VM_ALLOC_NORMAL);
-
-		/*
-		 * Since the pages were only sbusy while neither the
-		 * buffer nor the object lock was held by us, or
-		 * reallocated while vm_page_grab() slept for busy
-		 * relinguish, they could have been invalidated.
-		 * Recheck the valid bits and re-read as needed.
-		 *
-		 * Note that the last page is made fully valid in the
-		 * read loop, and partial validity for the page at
-		 * index count - 1 could mean that the page was
-		 * invalidated or removed, so we must restart for
-		 * safety as well.
-		 */
-		if (ma[i]->valid != VM_PAGE_BITS_ALL)
-			redo = true;
-	}
-	if (redo && error == 0)
-		goto again;
-	VM_OBJECT_WUNLOCK(object);
-	return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
+	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
+	    ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
 }