Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 3 Oct 2016 20:49:02 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Anton Yuzhaninov <citrin@citrin.ru>
Cc:        "freebsd-fs@freebsd.org" <freebsd-fs@freebsd.org>
Subject:   Re: UFS: unaligned read from GELI with 8k sectorsize
Message-ID:  <20161003174902.GR38409@kib.kiev.ua>
In-Reply-To: <b4a71a1b-d3c0-68a5-6999-e6e851f796c2@citrin.ru>
References:  <f84b069b-aeee-ff3c-d4f9-e2fe3caaddb1@citrin.ru> <20161001114536.GX38409@kib.kiev.ua> <20161001115439.GY38409@kib.kiev.ua> <68a8ed6d-e302-799c-3d2c-1d85c48d07bf@citrin.ru> <20161001211025.GD38409@kib.kiev.ua> <999638f9-3fee-82e3-d67f-cffef53b74e8@citrin.ru> <20161002191349.GH38409@kib.kiev.ua> <b4a71a1b-d3c0-68a5-6999-e6e851f796c2@citrin.ru>

next in thread | previous in thread | raw e-mail | index | archive | help
On Mon, Oct 03, 2016 at 01:14:15PM -0400, Anton Yuzhaninov wrote:
> With this patch I can exec various binaries from 8k geli when I boot 
> from a USB stick, but can't boot with root on this geli. Boot process 
> stops after init exec.
> 
> DDB bt for init:
> https://imgur.com/a/FXuzw
> ps in DDB shows init in running state (CPU2).

I did several bug fixing and read passes over the patch, below is the
current version, hopefully more robust. BTW, do you have INVARIANTS and
perhaps WITNESS in your kernel config ?

diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 2af5383..66e8a8f 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/priv.h>
 #include <sys/rwlock.h>
 #include <sys/stat.h>
+#include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
@@ -118,14 +119,14 @@ static vop_listextattr_t	ffs_listextattr;
 static vop_openextattr_t	ffs_openextattr;
 static vop_setextattr_t	ffs_setextattr;
 static vop_vptofh_t	ffs_vptofh;
-
+static vop_getpages_t	ffs_getpages;
 
 /* Global vfs data structures for ufs. */
 struct vop_vector ffs_vnodeops1 = {
 	.vop_default =		&ufs_vnodeops,
 	.vop_fsync =		ffs_fsync,
 	.vop_fdatasync =	ffs_fdatasync,
-	.vop_getpages =		vnode_pager_local_getpages,
+	.vop_getpages =		ffs_getpages,
 	.vop_getpages_async =	vnode_pager_local_getpages_async,
 	.vop_lock1 =		ffs_lock,
 	.vop_read =		ffs_read,
@@ -147,7 +148,7 @@ struct vop_vector ffs_vnodeops2 = {
 	.vop_default =		&ufs_vnodeops,
 	.vop_fsync =		ffs_fsync,
 	.vop_fdatasync =	ffs_fdatasync,
-	.vop_getpages =		vnode_pager_local_getpages,
+	.vop_getpages =		ffs_getpages,
 	.vop_getpages_async =	vnode_pager_local_getpages_async,
 	.vop_lock1 =		ffs_lock,
 	.vop_read =		ffs_read,
@@ -1784,3 +1788,98 @@ vop_vptofh {
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
+
+SYSCTL_DECL(_vfs_ffs);
+static int use_buf_pager;
+SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RW, &use_buf_pager, 0,
+    "always use buffer pager instead of bmap");
+
+static int
+ffs_getpages(struct vop_getpages_args *ap)
+{
+	struct vnode *vp;
+	vm_page_t *mm, m;
+	vm_object_t object;
+	struct buf *bp;
+	struct ufsmount *um;
+	vm_pindex_t pi;
+	ufs_lbn_t lbn, lbnp;
+	long bsize;
+	int count, error, i;
+	bool redo;
+
+	vp = ap->a_vp;
+	mm = ap->a_m;
+	count = ap->a_count;
+
+	um = VFSTOUFS(ap->a_vp->v_mount);
+	if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
+		return (vnode_pager_generic_getpages(vp, mm, count,
+		    ap->a_rbehind, ap->a_rahead, NULL, NULL));
+
+	object = vp->v_object;
+	if (IDX_TO_OFF(mm[count - 1]->pindex) >= object->un_pager.vnp.vnp_size)
+		return (VM_PAGER_BAD);
+	VM_OBJECT_WLOCK(object);
+again:
+	for (i = 0; i < count; i++) {
+		m = mm[i];
+		vm_page_xunbusy(m);
+		vm_page_sbusy(m);
+	}
+	VM_OBJECT_WUNLOCK(object);
+
+	lbnp = -1;
+	for (i = 0; i < count; i++) {
+		m = mm[i];
+		if (m->valid == VM_PAGE_BITS_ALL)
+			continue;
+		lbn = lblkno(um->um_fs, IDX_TO_OFF(m->pindex));
+		if (lbn != lbnp) {
+			bsize = blksize(um->um_fs, VTOI(vp), lbn);
+			error = bread_gb(vp, lbn, bsize, NOCRED, GB_UNMAPPED,
+			    &bp);
+			if (error != 0)
+				break;
+			if (LIST_EMPTY(&bp->b_dep)) {
+				bp->b_flags |= B_RELBUF;
+				brelse(bp);
+			} else {
+				/* XXX */
+				bqrelse(bp);
+			}
+			lbnp = lbn;
+		}
+	}
+
+	VM_OBJECT_WLOCK(object);
+	redo = false;
+	for (i = 0; i < count; i++) {
+		m = mm[i];
+		if (error == 0) {
+			if (i == count - 1)
+				vm_page_zero_invalid(m, TRUE);
+			else
+				KASSERT(m->valid == VM_PAGE_BITS_ALL,
+				    ("run %d %p invalid", i, m));
+		}
+		vm_page_sunbusy(m);
+		while (vm_page_busied(m)) {
+			pi = m->pindex;
+			vm_page_lock(m);
+			VM_OBJECT_WUNLOCK(object);
+			vm_page_busy_sleep(m, "ffspgl");
+			VM_OBJECT_WLOCK(object);
+			m = vm_page_lookup(object, pi);
+			if (mm[i] != m)
+				mm[i] = m;
+		}
+		vm_page_xbusy(m);
+		if (m->valid != VM_PAGE_BITS_ALL)
+			redo = true;
+	}
+	if (redo && error == 0)
+		goto again;
+	VM_OBJECT_WUNLOCK(object);
+	return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
+}



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20161003174902.GR38409>