Date: Fri, 14 Dec 2012 06:03:39 +0200 From: Konstantin Belousov <kostikbel@gmail.com> To: Rick Macklem <rmacklem@uoguelph.ca> Cc: Tim Kientzle <kientzle@freebsd.org>, freebsd-current Current <freebsd-current@freebsd.org> Subject: Re: r244036 kernel hangs under load. Message-ID: <20121214040339.GI71906@kib.kiev.ua> In-Reply-To: <1783866028.1396164.1355454869890.JavaMail.root@erie.cs.uoguelph.ca> References: <20121213043132.GB71906@kib.kiev.ua> <1783866028.1396164.1355454869890.JavaMail.root@erie.cs.uoguelph.ca>
next in thread | previous in thread | raw e-mail | index | archive | help
--jaTU8Y2VLE5tlY1O Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Thu, Dec 13, 2012 at 10:14:29PM -0500, Rick Macklem wrote: > Good work. This patch seems to have done the trick. I've run > quite a few kernel build cycles without a hang. I'll keep running > them, but I would have expected to see a hang by now. >=20 > Maybe Tim can test the patch as well? > (I needed to add #include <sys/smp.h> btw.) Below is the current version of the patch, it is being tested by Peter Holm. Compared to what I sent you earlier, it contain a bug fix only relevant if you use quotas. diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 67e078d..64d75fb 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include <sys/reboot.h> #include <sys/sched.h> #include <sys/sleepqueue.h> +#include <sys/smp.h> #include <sys/stat.h> #include <sys/sysctl.h> #include <sys/syslog.h> @@ -4710,32 +4711,54 @@ __mnt_vnode_markerfree_all(struct vnode **mvp, stru= ct mount *mp) * These are helper functions for filesystems to traverse their * active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h */ -struct vnode * -__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) +static void +mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) +{ + + KASSERT((*mvp)->v_mount =3D=3D mp, ("marker vnode mount list mismatch")); + + MNT_ILOCK(mp); + MNT_REL(mp); + MNT_IUNLOCK(mp); + free(*mvp, M_VNODE_MARKER); + *mvp =3D NULL; +} + +#ifdef SMP +#define ALWAYS_YIELD (mp_ncpus =3D=3D 1) +#else +#define ALWAYS_YIELD 1 +#endif + +static struct vnode * +mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) { struct vnode *vp, *nvp; =20 - if (should_yield()) - kern_yield(PRI_UNCHANGED); - mtx_lock(&vnode_free_list_mtx); -restart: + mtx_assert(&vnode_free_list_mtx, MA_OWNED); KASSERT((*mvp)->v_mount =3D=3D mp, ("marker vnode mount list mismatch")); +restart: vp =3D TAILQ_NEXT(*mvp, v_actfreelist); + TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); while (vp !=3D NULL) { if (vp->v_type =3D=3D VMARKER) { vp =3D TAILQ_NEXT(vp, v_actfreelist); continue; } if (!VI_TRYLOCK(vp)) { - if (should_yield()) { + if (ALWAYS_YIELD || should_yield()) { + TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist); mtx_unlock(&vnode_free_list_mtx); - kern_yield(PRI_UNCHANGED); + kern_yield(PRI_USER); mtx_lock(&vnode_free_list_mtx); + goto restart; } - goto restart; + continue; } - if (vp->v_mount =3D=3D mp && vp->v_type !=3D VMARKER && - (vp->v_iflag & VI_DOOMED) =3D=3D 0) + KASSERT(vp->v_type !=3D VMARKER, ("locked marker %p", vp)); + KASSERT(vp->v_mount =3D=3D mp || vp->v_mount =3D=3D NULL, + ("alien vnode on the active list %p %p", vp, mp)); + if (vp->v_mount =3D=3D mp && (vp->v_iflag & VI_DOOMED) =3D=3D 0) break; nvp =3D TAILQ_NEXT(vp, v_actfreelist); VI_UNLOCK(vp); @@ -4745,22 +4768,31 @@ restart: /* Check if we are done */ if (vp =3D=3D NULL) { mtx_unlock(&vnode_free_list_mtx); - __mnt_vnode_markerfree_active(mvp, mp); - mtx_assert(MNT_MTX(mp), MA_NOTOWNED); + mnt_vnode_markerfree_active(mvp, mp); return (NULL); } - TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist); mtx_unlock(&vnode_free_list_mtx); ASSERT_VI_LOCKED(vp, "active iter"); KASSERT((vp->v_iflag & VI_ACTIVE) !=3D 0, ("Non-active vp %p", vp)); return (vp); } +#undef ALWAYS_YIELD + +struct vnode * +__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) +{ + + if (should_yield()) + kern_yield(PRI_UNCHANGED); + mtx_lock(&vnode_free_list_mtx); + return (mnt_vnode_next_active(mvp, mp)); +} =20 struct vnode * __mnt_vnode_first_active(struct vnode **mvp, struct mount *mp) { - struct vnode *vp, *nvp; + struct vnode *vp; =20 *mvp =3D malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); MNT_ILOCK(mp); @@ -4770,44 +4802,14 @@ __mnt_vnode_first_active(struct vnode **mvp, struct= mount *mp) (*mvp)->v_mount =3D mp; =20 mtx_lock(&vnode_free_list_mtx); -restart: vp =3D TAILQ_FIRST(&mp->mnt_activevnodelist); - while (vp !=3D NULL) { - if (vp->v_type =3D=3D VMARKER) { - vp =3D TAILQ_NEXT(vp, v_actfreelist); - continue; - } - if (!VI_TRYLOCK(vp)) { - if (should_yield()) { - mtx_unlock(&vnode_free_list_mtx); - kern_yield(PRI_UNCHANGED); - mtx_lock(&vnode_free_list_mtx); - } - goto restart; - } - if (vp->v_mount =3D=3D mp && vp->v_type !=3D VMARKER && - (vp->v_iflag & VI_DOOMED) =3D=3D 0) - break; - nvp =3D TAILQ_NEXT(vp, v_actfreelist); - VI_UNLOCK(vp); - vp =3D nvp; - } - - /* Check if we are done */ if (vp =3D=3D NULL) { mtx_unlock(&vnode_free_list_mtx); - MNT_ILOCK(mp); - MNT_REL(mp); - MNT_IUNLOCK(mp); - free(*mvp, M_VNODE_MARKER); - *mvp =3D NULL; + mnt_vnode_markerfree_active(mvp, mp); return (NULL); } - TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist); - mtx_unlock(&vnode_free_list_mtx); - ASSERT_VI_LOCKED(vp, "active iter first"); - KASSERT((vp->v_iflag & VI_ACTIVE) !=3D 0, ("Non-active vp %p", vp)); - return (vp); + TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist); + return (mnt_vnode_next_active(mvp, mp)); } =20 void @@ -4817,14 +4819,8 @@ __mnt_vnode_markerfree_active(struct vnode **mvp, st= ruct mount *mp) if (*mvp =3D=3D NULL) return; =20 - KASSERT((*mvp)->v_mount =3D=3D mp, ("marker vnode mount list mismatch")); - mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); mtx_unlock(&vnode_free_list_mtx); - MNT_ILOCK(mp); - MNT_REL(mp); - MNT_IUNLOCK(mp); - free(*mvp, M_VNODE_MARKER); - *mvp =3D NULL; + mnt_vnode_markerfree_active(mvp, mp); } diff --git a/sys/sys/mount.h b/sys/sys/mount.h index 4d010ec..ed2b002 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -199,8 +199,8 @@ struct vnode *__mnt_vnode_next_all(struct vnode **mvp, = struct mount *mp); struct vnode *__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp); void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount = *mp); =20 -#define MNT_VNODE_FOREACH_ALL(vp, mp, mvp) \ - for (vp =3D __mnt_vnode_first_all(&(mvp), (mp)); \ +#define MNT_VNODE_FOREACH_ALL(vp, mp, mvp) \ + for (vp =3D __mnt_vnode_first_all(&(mvp), (mp)); \ (vp) !=3D NULL; vp =3D __mnt_vnode_next_all(&(mvp), (mp))) =20 #define MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp) \ @@ -218,17 +218,12 @@ struct vnode *__mnt_vnode_next_active(struct vnode **= mvp, struct mount *mp); struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *m= p); void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mou= nt *); =20 -#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \ - for (vp =3D __mnt_vnode_first_active(&(mvp), (mp)); \ +#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \ + for (vp =3D __mnt_vnode_first_active(&(mvp), (mp)); \ (vp) !=3D NULL; vp =3D __mnt_vnode_next_active(&(mvp), (mp))) =20 #define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \ - do { \ - MNT_ILOCK(mp); \ - __mnt_vnode_markerfree_active(&(mvp), (mp)); \ - /* MNT_IUNLOCK(mp); -- done in above function */ \ - mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \ - } while (0) + __mnt_vnode_markerfree_active(&(mvp), (mp)) =20 /* * Definitions for MNT_VNODE_FOREACH. diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c index 330f449..87ac9a1 100644 --- a/sys/ufs/ufs/ufs_quota.c +++ b/sys/ufs/ufs/ufs_quota.c @@ -1044,7 +1044,7 @@ again: error =3D vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); if (error) { if (error =3D=3D ENOENT) { - MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); + MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp); goto again; } continue; --jaTU8Y2VLE5tlY1O Content-Type: application/pgp-signature -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.19 (FreeBSD) iEYEARECAAYFAlDKpRcACgkQC3+MBN1Mb4ivmwCfZQN/MSCAKeSr11ZdS78ty+0O sMUAnRt32B6sO4I9YgGpdH3U7TIULIHb =+nFT -----END PGP SIGNATURE----- --jaTU8Y2VLE5tlY1O--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20121214040339.GI71906>