Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 28 Dec 2012 13:27:24 +0200
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Andreas Longwitz <longwitz@incore.de>
Cc:        pho@freebsd.org, freebsd-stable@freebsd.org, fs@freebsd.org
Subject:   Re: FS hang with suspfs when creating snapshot on a UFS + GJOURNAL setup
Message-ID:  <20121228112724.GR82219@kib.kiev.ua>
In-Reply-To: <50DD6423.5090305@incore.de>
References:  <50DC30F6.1050904@incore.de> <20121227133355.GI82219@kib.kiev.ua> <50DC8999.8000708@incore.de> <20121227194145.GM82219@kib.kiev.ua> <50DD6423.5090305@incore.de>

next in thread | previous in thread | raw e-mail | index | archive | help

--r7tUYVWcdYzDJoZW
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Fri, Dec 28, 2012 at 10:19:31AM +0100, Andreas Longwitz wrote:
> Konstantin Belousov wrote:
> >>> On Thu, Dec 27, 2012 at 12:28:54PM +0100, Andreas Longwitz wrote:
> >> db> alltrace (pid 18 and 7126)
> >>
> >> Tracing command g_journal switcher pid 18 tid 100076 td 0xffffff0002bd=
5000
> >> sched_switch() at sched_switch+0xde
> >> mi_switch() at mi_switch+0x186
> >> sleepq_wait() at sleepq_wait+0x42
> >> __lockmgr_args() at __lockmgr_args+0x49b
> >> ffs_copyonwrite() at ffs_copyonwrite+0x19a
> >> ffs_geom_strategy() at ffs_geom_strategy+0x1b5
> >> bufwrite() at bufwrite+0xe9
> >> ffs_sbupdate() at ffs_sbupdate+0x12a
> >> g_journal_ufs_clean() at g_journal_ufs_clean+0x3e
> >> g_journal_switcher() at g_journal_switcher+0xe5e
> >> fork_exit() at fork_exit+0x11f
> >> fork_trampoline() at fork_trampoline+0xe
> >> --- trap 0, rip =3D 0, rsp =3D 0xffffff8242ca8cf0, rbp =3D 0 ---
> >>
> >> Tracing command mksnap_ffs pid 7126 tid 100157 td 0xffffff000807a470
> >> sched_switch() at sched_switch+0xde
> >> mi_switch() at mi_switch+0x186
> >> sleepq_wait() at sleepq_wait+0x42
> >> _sleep() at _sleep+0x373
> >> vn_start_write() at vn_start_write+0xdf
> >> ffs_snapshot() at ffs_snapshot+0xe2b
> > Can you look up the line number for the ffs_snapshot+0xe2b ?
>=20
> (kgdb) list *ffs_snapshot+0xe2b
> 0xffffffff8056287b is in ffs_snapshot
> (/usr/src/sys/ufs/ffs/ffs_snapshot.c:676).
> 671    /*
> 672     * Resume operation on filesystem.
> 673     */
> 674    vfs_write_resume(vp->v_mount);
> 675    vn_start_write(NULL, &wrtmp, V_WAIT);
> 676    if (collectsnapstats && starttime.tv_sec > 0) {
> 677         nanotime(&endtime);
> 678         timespecsub(&endtime, &starttime);
> 679         printf("%s: suspended %ld.%03ld sec, redo %ld of %d\n",
> 680            vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
>=20
> > I think the bug is that vn_start_write() is called while the snaplock
> > is owned, after the out1 label in ffs_snapshot() (I am looking at the
> > HEAD code).
>=20
> You are right, the vn_start_write() is just after the out1 label.

Please try the following patch. It is against HEAD, might need some
adjustments for 8. I do the resume and write accounting atomically,
not allowing other suspension to intervent between.

diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 3f65b05..cf49ecb 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -1434,6 +1434,40 @@ vn_closefile(fp, td)
  * proceed. If a suspend request is in progress, we wait until the
  * suspension is over, and then proceed.
  */
+static int
+vn_start_write_locked(struct mount *mp, int flags)
+{
+	int error;
+
+	mtx_assert(MNT_MTX(mp), MA_OWNED);
+	error =3D 0;
+
+	/*
+	 * Check on status of suspension.
+	 */
+	if ((curthread->td_pflags & TDP_IGNSUSP) =3D=3D 0 ||
+	    mp->mnt_susp_owner !=3D curthread) {
+		while ((mp->mnt_kern_flag & MNTK_SUSPEND) !=3D 0) {
+			if (flags & V_NOWAIT) {
+				error =3D EWOULDBLOCK;
+				goto unlock;
+			}
+			error =3D msleep(&mp->mnt_flag, MNT_MTX(mp),
+			    (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
+			if (error)
+				goto unlock;
+		}
+	}
+	if (flags & V_XSLEEP)
+		goto unlock;
+	mp->mnt_writeopcount++;
+unlock:
+	if (error !=3D 0 || (flags & V_XSLEEP) !=3D 0)
+		MNT_REL(mp);
+	MNT_IUNLOCK(mp);
+	return (error);
+}
+
 int
 vn_start_write(vp, mpp, flags)
 	struct vnode *vp;
@@ -1470,30 +1504,7 @@ vn_start_write(vp, mpp, flags)
 	if (vp =3D=3D NULL)
 		MNT_REF(mp);
=20
-	/*
-	 * Check on status of suspension.
-	 */
-	if ((curthread->td_pflags & TDP_IGNSUSP) =3D=3D 0 ||
-	    mp->mnt_susp_owner !=3D curthread) {
-		while ((mp->mnt_kern_flag & MNTK_SUSPEND) !=3D 0) {
-			if (flags & V_NOWAIT) {
-				error =3D EWOULDBLOCK;
-				goto unlock;
-			}
-			error =3D msleep(&mp->mnt_flag, MNT_MTX(mp),
-			    (PUSER - 1) | (flags & PCATCH), "suspfs", 0);
-			if (error)
-				goto unlock;
-		}
-	}
-	if (flags & V_XSLEEP)
-		goto unlock;
-	mp->mnt_writeopcount++;
-unlock:
-	if (error !=3D 0 || (flags & V_XSLEEP) !=3D 0)
-		MNT_REL(mp);
-	MNT_IUNLOCK(mp);
-	return (error);
+	return (vn_start_write_locked(mp, flags));
 }
=20
 /*
@@ -1639,8 +1650,7 @@ vfs_write_suspend(mp)
  * Request a filesystem to resume write operations.
  */
 void
-vfs_write_resume(mp)
-	struct mount *mp;
+vfs_write_resume_flags(struct mount *mp, int flags)
 {
=20
 	MNT_ILOCK(mp);
@@ -1652,10 +1662,25 @@ vfs_write_resume(mp)
 		wakeup(&mp->mnt_writeopcount);
 		wakeup(&mp->mnt_flag);
 		curthread->td_pflags &=3D ~TDP_IGNSUSP;
+		if ((flags & VR_START_WRITE) !=3D 0) {
+			MNT_REF(mp);
+			mp->mnt_writeopcount++;
+		}
 		MNT_IUNLOCK(mp);
 		VFS_SUSP_CLEAN(mp);
-	} else
+	} else if ((flags & VR_START_WRITE) !=3D 0) {
+		MNT_REF(mp);
+		vn_start_write_locked(mp, 0);
+	} else {
 		MNT_IUNLOCK(mp);
+	}
+}
+
+void
+vfs_write_resume(struct mount *mp)
+{
+
+	vfs_write_resume_flags(mp, 0);
 }
=20
 /*
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 42f9e5f..4371b40 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -392,6 +392,8 @@ extern int		vttoif_tab[];
 #define	V_NOWAIT	0x0002	/* vn_start_write: don't sleep for suspend */
 #define	V_XSLEEP	0x0004	/* vn_start_write: just return after sleep */
=20
+#define	VR_START_WRITE	0x0001	/* vfs_write_resume: start write atomically =
*/
+
 #define	VREF(vp)	vref(vp)
=20
 #ifdef DIAGNOSTIC
@@ -701,6 +703,7 @@ int	vn_io_fault_uiomove(char *data, int xfersize, struc=
t uio *uio);
 int	vfs_cache_lookup(struct vop_lookup_args *ap);
 void	vfs_timestamp(struct timespec *);
 void	vfs_write_resume(struct mount *mp);
+void	vfs_write_resume_flags(struct mount *mp, int flags);
 int	vfs_write_suspend(struct mount *mp);
 int	vop_stdbmap(struct vop_bmap_args *);
 int	vop_stdfsync(struct vop_fsync_args *);
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index e528509..25ad79c 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -687,8 +687,7 @@ out1:
 	/*
 	 * Resume operation on filesystem.
 	 */
-	vfs_write_resume(vp->v_mount);
-	vn_start_write(NULL, &wrtmp, V_WAIT);
+	vfs_write_resume_flags(vp->v_mount, VR_START_WRITE);
 	if (collectsnapstats && starttime.tv_sec > 0) {
 		nanotime(&endtime);
 		timespecsub(&endtime, &starttime);

--r7tUYVWcdYzDJoZW
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (FreeBSD)

iQIcBAEBAgAGBQJQ3YIbAAoJEJDCuSvBvK1BencP/0168lx07OvV4T7toyComatr
aMz0nFicRqIkTzK52yDACCt8XMarnzV87EzkHfirRbtu4xwjxF27BHIgowDUCeDW
U4Cne6f9wt8zVX/9NkGYmbJAZ5Osuz22nAUz5AAd9H6S05IC4HJ/+ovsWw57riEU
ljWNkCS+tRs8uFjAK6aKUiE7c5eoaL+IXVCrdAVW2Puc48eJ9B6dl9NesYdbCqEs
RkTD6RyG0qdHPEsBpYmyxq+uklJVlrm3uxEzKhbmbCEcfrwynXvb/wR0d4Kfmyb2
tje334D/C7+nv6oITdMOqfkdbBDJuKGMR9aWh+8lkuCgZnK7SLC3tYsEOGhk3EBy
/mo1W7DpcWu2qciEDsJDBCQQ1r8ZJXFT8Rtx0c2KzioNbsGriTiVkcghJBMzvUMi
nNbUkoGu/37w9J4t+kxbDgLtRiPI9P5YfOEUHJgGnVP3o/4JQkmcDnDiKZnw6NhF
s1qXtk7pVFHgrJ2sN40NkX2rIZvE4Ih/Ueti83UWmWwr9p9sF/O/0TIDXMmImNAa
MYnGvTx8pPgmXFjfGKkAc2EYG24PAuXhSD1oXZjY+z2m3UKgXpQ01oCLv2i6kkRL
XCSuN6b/vQ6VyDvUCnZt1u+gbg7hgiM3htHnXHjDlRu/QaY/EiJHcUFWljpClb63
Po93TZJCvHxewZsuu1Kf
=EJ0n
-----END PGP SIGNATURE-----

--r7tUYVWcdYzDJoZW--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20121228112724.GR82219>