Date: Thu, 09 May 2013 14:11:44 +0200 From: Goran Lowkrantz <goran.lowkrantz@ismobile.com> To: Konstantin Belousov <kostikbel@gmail.com> Cc: pho@freebsd.org, freebsd-stable@freebsd.org Subject: Re: Nullfs leaks i-nodes Message-ID: <BFD5A40BC867D1DA2AAEFF16@[10.255.253.2]> In-Reply-To: <20130508091317.GJ3047@kib.kiev.ua> References: <B799E3B928B18B9E6C68F912@[172.16.2.62]> <20130508091317.GJ3047@kib.kiev.ua>
next in thread | previous in thread | raw e-mail | index | archive | help
--On Wednesday, 08 May, 2013 12:13 PM +0300 Konstantin Belousov <kostikbel@gmail.com> wrote: > On Tue, May 07, 2013 at 08:30:06AM +0200, G??ran L??wkrantz wrote: >> I created a PR, kern/178238, on this but would like to know if anyone >> has any ideas or patches? >> >> Have updated the system where I see this to FreeBSD 9.1-STABLE #0 >> r250229 and still have the problem. > > The patch below should fix the issue for you, at least it did so in my > limited testing. > > What is does: > 1. When inactivating a nullfs vnode, check if the lower vnode is > unlinked, and reclaim upper vnode if so. [This fixes your case]. > > 2. Besides a callback to the upper filesystems for the lower vnode > reclaimation, it also calls the upper fs for lower vnode unlink. > This allows nullfs to purge cached vnodes for the unlinked lower. > [This fixes an opposite case, when the vnode is removed from the > lower mount, but upper aliases prevent the vnode from being > recycled]. > > 3. Fix a wart which existed from the introduction of the nullfs caching, > do not unlock lower vnode in the nullfs_reclaim_lowervp(). It should > be completely innocent, but now it is also formally safe. > > 4. Fix vnode reference leak in nullfs_reclaim_lowervp(). > > Please note that the patch is basically not tested, I only verified your > scenario and a mirror of it as described in item 2. > > diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h > index 4f37020..a624be6 100644 > --- a/sys/fs/nullfs/null.h > +++ b/sys/fs/nullfs/null.h > @@ -53,8 +53,11 @@ struct null_node { > LIST_ENTRY(null_node) null_hash; /* Hash list */ > struct vnode *null_lowervp; /* VREFed once */ > struct vnode *null_vnode; /* Back pointer */ > + u_int null_flags; > }; > > +#define NULLV_NOUNLOCK 0x0001 > + > #define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data)) > #define VTONULL(vp) ((struct null_node *)(vp)->v_data) > #define NULLTOV(xp) ((xp)->null_vnode) > diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c > index 02932bd..544c358 100644 > --- a/sys/fs/nullfs/null_vfsops.c > +++ b/sys/fs/nullfs/null_vfsops.c > @@ -65,7 +65,6 @@ static vfs_statfs_t nullfs_statfs; > static vfs_unmount_t nullfs_unmount; > static vfs_vget_t nullfs_vget; > static vfs_extattrctl_t nullfs_extattrctl; > -static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp; > > /* > * Mount null layer > @@ -391,8 +390,22 @@ nullfs_reclaim_lowervp(struct mount *mp, struct > vnode *lowervp) vp = null_hashget(mp, lowervp); > if (vp == NULL) > return; > + VTONULL(vp)->null_flags |= NULLV_NOUNLOCK; > vgone(vp); > - vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY); > + vput(vp); > +} > + > +static void > +nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp) > +{ > + struct vnode *vp; > + > + vp = null_hashget(mp, lowervp); > + if (vp == NULL || vp->v_usecount > 1) > + return; > + VTONULL(vp)->null_flags |= NULLV_NOUNLOCK; > + vgone(vp); > + vput(vp); > } > > static struct vfsops null_vfsops = { > @@ -408,6 +421,7 @@ static struct vfsops null_vfsops = { > .vfs_unmount = nullfs_unmount, > .vfs_vget = nullfs_vget, > .vfs_reclaim_lowervp = nullfs_reclaim_lowervp, > + .vfs_unlink_lowervp = nullfs_unlink_lowervp, > }; > > VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL); > diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c > index f59865f..3c41124 100644 > --- a/sys/fs/nullfs/null_vnops.c > +++ b/sys/fs/nullfs/null_vnops.c > @@ -692,18 +692,21 @@ null_unlock(struct vop_unlock_args *ap) > static int > null_inactive(struct vop_inactive_args *ap __unused) > { > - struct vnode *vp; > + struct vnode *vp, *lvp; > struct mount *mp; > struct null_mount *xmp; > > vp = ap->a_vp; > + lvp = NULLVPTOLOWERVP(vp); > mp = vp->v_mount; > xmp = MOUNTTONULLMOUNT(mp); > - if ((xmp->nullm_flags & NULLM_CACHE) == 0) { > + if ((xmp->nullm_flags & NULLM_CACHE) == 0 || > + (lvp->v_vflag & VV_NOSYNC) != 0) { > /* > * If this is the last reference and caching of the > - * nullfs vnodes is not enabled, then free up the > - * vnode so as not to tie up the lower vnodes. > + * nullfs vnodes is not enabled, or the lower vnode is > + * deleted, then free up the vnode so as not to tie up > + * the lower vnodes. > */ > vp->v_object = NULL; > vrecycle(vp); > @@ -748,7 +751,10 @@ null_reclaim(struct vop_reclaim_args *ap) > */ > if (vp->v_writecount > 0) > VOP_ADD_WRITECOUNT(lowervp, -1); > - vput(lowervp); > + if ((xp->null_flags & NULLV_NOUNLOCK) != 0) > + vunref(lowervp); > + else > + vput(lowervp); > free(xp, M_NULLFSNODE); > > return (0); > diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c > index de118f7..988a114 100644 > --- a/sys/kern/vfs_subr.c > +++ b/sys/kern/vfs_subr.c > @@ -2700,19 +2700,20 @@ vgone(struct vnode *vp) > } > > static void > -vgonel_reclaim_lowervp_vfs(struct mount *mp __unused, > +notify_lowervp_vfs_dummy(struct mount *mp __unused, > struct vnode *lowervp __unused) > { > } > > /* > - * Notify upper mounts about reclaimed vnode. > + * Notify upper mounts about reclaimed or unlinked vnode. > */ > -static void > -vgonel_reclaim_lowervp(struct vnode *vp) > +void > +vfs_notify_upper(struct vnode *vp, int event) > { > static struct vfsops vgonel_vfsops = { > - .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs > + .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy, > + .vfs_unlink_lowervp = notify_lowervp_vfs_dummy, > }; > struct mount *mp, *ump, *mmp; > > @@ -2736,7 +2737,17 @@ vgonel_reclaim_lowervp(struct vnode *vp) > } > TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); > MNT_IUNLOCK(mp); > - VFS_RECLAIM_LOWERVP(ump, vp); > + switch (event) { > + case VFS_NOTIFY_UPPER_RECLAIM: > + VFS_RECLAIM_LOWERVP(ump, vp); > + break; > + case VFS_NOTIFY_UPPER_UNLINK: > + VFS_UNLINK_LOWERVP(ump, vp); > + break; > + default: > + KASSERT(0, ("invalid event %d", event)); > + break; > + } > MNT_ILOCK(mp); > ump = TAILQ_NEXT(mmp, mnt_upper_link); > TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); > @@ -2783,7 +2794,7 @@ vgonel(struct vnode *vp) > active = vp->v_usecount; > oweinact = (vp->v_iflag & VI_OWEINACT); > VI_UNLOCK(vp); > - vgonel_reclaim_lowervp(vp); > + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); > > /* > * Clean out any buffers associated with the vnode. > diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c > index 29cb7bd..a004ea0 100644 > --- a/sys/kern/vfs_syscalls.c > +++ b/sys/kern/vfs_syscalls.c > @@ -1846,6 +1846,7 @@ restart: > if (error) > goto out; > #endif > + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); > error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); > #ifdef MAC > out: > @@ -3825,6 +3826,7 @@ restart: > return (error); > goto restart; > } > + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); > error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); > vn_finished_write(mp); > out: > diff --git a/sys/sys/mount.h b/sys/sys/mount.h > index a9c86f6..a953dae 100644 > --- a/sys/sys/mount.h > +++ b/sys/sys/mount.h > @@ -608,7 +608,7 @@ typedef int vfs_mount_t(struct mount *mp); > typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, > struct sysctl_req *req); > typedef void vfs_susp_clean_t(struct mount *mp); > -typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode > *lowervp); +typedef void vfs_notify_lowervp_t(struct mount *mp, struct > vnode *lowervp); > struct vfsops { > vfs_mount_t *vfs_mount; > @@ -626,7 +626,8 @@ struct vfsops { > vfs_extattrctl_t *vfs_extattrctl; > vfs_sysctl_t *vfs_sysctl; > vfs_susp_clean_t *vfs_susp_clean; > - vfs_reclaim_lowervp_t *vfs_reclaim_lowervp; > + vfs_notify_lowervp_t *vfs_reclaim_lowervp; > + vfs_notify_lowervp_t *vfs_unlink_lowervp; > }; > > vfs_statfs_t __vfs_statfs; > @@ -747,6 +748,14 @@ vfs_statfs_t __vfs_statfs; > } \ > } while (0) > > +#define VFS_UNLINK_LOWERVP(MP, VP) do { \ > + if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \ > + VFS_PROLOGUE(MP); \ > + (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \ > + VFS_EPILOGUE(MP); \ > + } \ > +} while (0) > + > #define VFS_KNOTE_LOCKED(vp, hint) do \ > { \ > if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ > @@ -759,6 +768,9 @@ vfs_statfs_t __vfs_statfs; > VN_KNOTE((vp), (hint), 0); \ > } while (0) > > +#define VFS_NOTIFY_UPPER_RECLAIM 1 > +#define VFS_NOTIFY_UPPER_UNLINK 2 > + > #include <sys/module.h> > > /* > @@ -840,6 +852,7 @@ int vfs_modevent(module_t, int, void *); > void vfs_mount_error(struct mount *, const char *, ...); > void vfs_mountroot(void); /* mount our root filesystem */ > void vfs_mountedfrom(struct mount *, const char *from); > +void vfs_notify_upper(struct vnode *, int); > void vfs_oexport_conv(const struct oexport_args *oexp, > struct export_args *exp); > void vfs_ref(struct mount *); I assume this is CURRENT? Tried on STABLE but got this: cc1: warnings being treated as errors /usr/src/sys/kern/vfs_subr.c: In function 'vfs_notify_upper': /usr/src/sys/kern/vfs_subr.c:2801: warning: implicit declaration of function 'VFS_PROLOGUE' /usr/src/sys/kern/vfs_subr.c:2801: warning: nested extern declaration of 'VFS_PROLOGUE' [-Wnested-externs] /usr/src/sys/kern/vfs_subr.c:2801: warning: implicit declaration of function 'VFS_EPILOGUE' /usr/src/sys/kern/vfs_subr.c:2801: warning: nested extern declaration of 'VFS_EPILOGUE' [-Wnested-externs] *** [vfs_subr.o] Error code 1 /glz
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?BFD5A40BC867D1DA2AAEFF16>