Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 14 Dec 2012 06:03:39 +0200
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Rick Macklem <rmacklem@uoguelph.ca>
Cc:        Tim Kientzle <kientzle@freebsd.org>, freebsd-current Current <freebsd-current@freebsd.org>
Subject:   Re: r244036 kernel hangs under load.
Message-ID:  <20121214040339.GI71906@kib.kiev.ua>
In-Reply-To: <1783866028.1396164.1355454869890.JavaMail.root@erie.cs.uoguelph.ca>
References:  <20121213043132.GB71906@kib.kiev.ua> <1783866028.1396164.1355454869890.JavaMail.root@erie.cs.uoguelph.ca>

next in thread | previous in thread | raw e-mail | index | archive | help

--jaTU8Y2VLE5tlY1O
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Thu, Dec 13, 2012 at 10:14:29PM -0500, Rick Macklem wrote:
> Good work. This patch seems to have done the trick. I've run
> quite a few kernel build cycles without a hang. I'll keep running
> them, but I would have expected to see a hang by now.
>=20
> Maybe Tim can test the patch as well?
> (I needed to add #include <sys/smp.h> btw.)

Below is the current version of the patch, it is being tested by Peter Holm.
Compared to what I sent you earlier, it contain a bug fix only relevant if
you use quotas.

diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 67e078d..64d75fb 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/reboot.h>
 #include <sys/sched.h>
 #include <sys/sleepqueue.h>
+#include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
@@ -4710,32 +4711,54 @@ __mnt_vnode_markerfree_all(struct vnode **mvp, stru=
ct mount *mp)
  * These are helper functions for filesystems to traverse their
  * active vnodes.  See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h
  */
-struct vnode *
-__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+static void
+mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
+{
+
+	KASSERT((*mvp)->v_mount =3D=3D mp, ("marker vnode mount list mismatch"));
+
+	MNT_ILOCK(mp);
+	MNT_REL(mp);
+	MNT_IUNLOCK(mp);
+	free(*mvp, M_VNODE_MARKER);
+	*mvp =3D NULL;
+}
+
+#ifdef SMP
+#define	ALWAYS_YIELD	(mp_ncpus =3D=3D 1)
+#else
+#define	ALWAYS_YIELD	1
+#endif
+
+static struct vnode *
+mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
 {
 	struct vnode *vp, *nvp;
=20
-	if (should_yield())
-		kern_yield(PRI_UNCHANGED);
-	mtx_lock(&vnode_free_list_mtx);
-restart:
+	mtx_assert(&vnode_free_list_mtx, MA_OWNED);
 	KASSERT((*mvp)->v_mount =3D=3D mp, ("marker vnode mount list mismatch"));
+restart:
 	vp =3D TAILQ_NEXT(*mvp, v_actfreelist);
+	TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
 	while (vp !=3D NULL) {
 		if (vp->v_type =3D=3D VMARKER) {
 			vp =3D TAILQ_NEXT(vp, v_actfreelist);
 			continue;
 		}
 		if (!VI_TRYLOCK(vp)) {
-			if (should_yield()) {
+			if (ALWAYS_YIELD || should_yield()) {
+				TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
 				mtx_unlock(&vnode_free_list_mtx);
-				kern_yield(PRI_UNCHANGED);
+				kern_yield(PRI_USER);
 				mtx_lock(&vnode_free_list_mtx);
+				goto restart;
 			}
-			goto restart;
+			continue;
 		}
-		if (vp->v_mount =3D=3D mp && vp->v_type !=3D VMARKER &&
-		    (vp->v_iflag & VI_DOOMED) =3D=3D 0)
+		KASSERT(vp->v_type !=3D VMARKER, ("locked marker %p", vp));
+		KASSERT(vp->v_mount =3D=3D mp || vp->v_mount =3D=3D NULL,
+		    ("alien vnode on the active list %p %p", vp, mp));
+		if (vp->v_mount =3D=3D mp && (vp->v_iflag & VI_DOOMED) =3D=3D 0)
 			break;
 		nvp =3D TAILQ_NEXT(vp, v_actfreelist);
 		VI_UNLOCK(vp);
@@ -4745,22 +4768,31 @@ restart:
 	/* Check if we are done */
 	if (vp =3D=3D NULL) {
 		mtx_unlock(&vnode_free_list_mtx);
-		__mnt_vnode_markerfree_active(mvp, mp);
-		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
+		mnt_vnode_markerfree_active(mvp, mp);
 		return (NULL);
 	}
-	TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
 	TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
 	mtx_unlock(&vnode_free_list_mtx);
 	ASSERT_VI_LOCKED(vp, "active iter");
 	KASSERT((vp->v_iflag & VI_ACTIVE) !=3D 0, ("Non-active vp %p", vp));
 	return (vp);
 }
+#undef ALWAYS_YIELD
+
+struct vnode *
+__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+{
+
+	if (should_yield())
+		kern_yield(PRI_UNCHANGED);
+	mtx_lock(&vnode_free_list_mtx);
+	return (mnt_vnode_next_active(mvp, mp));
+}
=20
 struct vnode *
 __mnt_vnode_first_active(struct vnode **mvp, struct mount *mp)
 {
-	struct vnode *vp, *nvp;
+	struct vnode *vp;
=20
 	*mvp =3D malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
 	MNT_ILOCK(mp);
@@ -4770,44 +4802,14 @@ __mnt_vnode_first_active(struct vnode **mvp, struct=
 mount *mp)
 	(*mvp)->v_mount =3D mp;
=20
 	mtx_lock(&vnode_free_list_mtx);
-restart:
 	vp =3D TAILQ_FIRST(&mp->mnt_activevnodelist);
-	while (vp !=3D NULL) {
-		if (vp->v_type =3D=3D VMARKER) {
-			vp =3D TAILQ_NEXT(vp, v_actfreelist);
-			continue;
-		}
-		if (!VI_TRYLOCK(vp)) {
-			if (should_yield()) {
-				mtx_unlock(&vnode_free_list_mtx);
-				kern_yield(PRI_UNCHANGED);
-				mtx_lock(&vnode_free_list_mtx);
-			}
-			goto restart;
-		}
-		if (vp->v_mount =3D=3D mp && vp->v_type !=3D VMARKER &&
-		    (vp->v_iflag & VI_DOOMED) =3D=3D 0)
-			break;
-		nvp =3D TAILQ_NEXT(vp, v_actfreelist);
-		VI_UNLOCK(vp);
-		vp =3D nvp;
-	}
-
-	/* Check if we are done */
 	if (vp =3D=3D NULL) {
 		mtx_unlock(&vnode_free_list_mtx);
-		MNT_ILOCK(mp);
-		MNT_REL(mp);
-		MNT_IUNLOCK(mp);
-		free(*mvp, M_VNODE_MARKER);
-		*mvp =3D NULL;
+		mnt_vnode_markerfree_active(mvp, mp);
 		return (NULL);
 	}
-	TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
-	mtx_unlock(&vnode_free_list_mtx);
-	ASSERT_VI_LOCKED(vp, "active iter first");
-	KASSERT((vp->v_iflag & VI_ACTIVE) !=3D 0, ("Non-active vp %p", vp));
-	return (vp);
+	TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
+	return (mnt_vnode_next_active(mvp, mp));
 }
=20
 void
@@ -4817,14 +4819,8 @@ __mnt_vnode_markerfree_active(struct vnode **mvp, st=
ruct mount *mp)
 	if (*mvp =3D=3D NULL)
 		return;
=20
-	KASSERT((*mvp)->v_mount =3D=3D mp, ("marker vnode mount list mismatch"));
-
 	mtx_lock(&vnode_free_list_mtx);
 	TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
 	mtx_unlock(&vnode_free_list_mtx);
-	MNT_ILOCK(mp);
-	MNT_REL(mp);
-	MNT_IUNLOCK(mp);
-	free(*mvp, M_VNODE_MARKER);
-	*mvp =3D NULL;
+	mnt_vnode_markerfree_active(mvp, mp);
 }
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 4d010ec..ed2b002 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -199,8 +199,8 @@ struct vnode *__mnt_vnode_next_all(struct vnode **mvp, =
struct mount *mp);
 struct vnode *__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp);
 void          __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount =
*mp);
=20
-#define MNT_VNODE_FOREACH_ALL(vp, mp, mvp) \
-	for (vp =3D __mnt_vnode_first_all(&(mvp), (mp)); \
+#define MNT_VNODE_FOREACH_ALL(vp, mp, mvp)				\
+	for (vp =3D __mnt_vnode_first_all(&(mvp), (mp));			\
 		(vp) !=3D NULL; vp =3D __mnt_vnode_next_all(&(mvp), (mp)))
=20
 #define MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp)				\
@@ -218,17 +218,12 @@ struct vnode *__mnt_vnode_next_active(struct vnode **=
mvp, struct mount *mp);
 struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *m=
p);
 void          __mnt_vnode_markerfree_active(struct vnode **mvp, struct mou=
nt *);
=20
-#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \
-	for (vp =3D __mnt_vnode_first_active(&(mvp), (mp)); \
+#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) 				\
+	for (vp =3D __mnt_vnode_first_active(&(mvp), (mp)); 		\
 		(vp) !=3D NULL; vp =3D __mnt_vnode_next_active(&(mvp), (mp)))
=20
 #define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp)				\
-	do {								\
-		MNT_ILOCK(mp);						\
-		__mnt_vnode_markerfree_active(&(mvp), (mp));		\
-		/* MNT_IUNLOCK(mp); -- done in above function */	\
-		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);			\
-	} while (0)
+	__mnt_vnode_markerfree_active(&(mvp), (mp))
=20
 /*
  * Definitions for MNT_VNODE_FOREACH.
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index 330f449..87ac9a1 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -1044,7 +1044,7 @@ again:
 		error =3D vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td);
 		if (error) {
 			if (error =3D=3D ENOENT) {
-				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+				MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp);
 				goto again;
 			}
 			continue;

--jaTU8Y2VLE5tlY1O
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (FreeBSD)

iEYEARECAAYFAlDKpRcACgkQC3+MBN1Mb4ivmwCfZQN/MSCAKeSr11ZdS78ty+0O
sMUAnRt32B6sO4I9YgGpdH3U7TIULIHb
=+nFT
-----END PGP SIGNATURE-----

--jaTU8Y2VLE5tlY1O--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20121214040339.GI71906>