Date: Sat, 11 May 2013 11:17:44 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r250505 - in head/sys: fs/nullfs kern sys Message-ID: <201305111117.r4BBHidt052772@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Sat May 11 11:17:44 2013 New Revision: 250505 URL: http://svnweb.freebsd.org/changeset/base/250505 Log: - Fix nullfs vnode reference leak in nullfs_reclaim_lowervp(). The null_hashget() obtains the reference on the nullfs vnode, which must be dropped. - Fix a wart which existed from the introduction of the nullfs caching, do not unlock lower vnode in the nullfs_reclaim_lowervp(). It should be innocent, but now it is also formally safe. Inform the nullfs_reclaim() about this using the NULLV_NOUNLOCK flag set on nullfs inode. - Add a callback to the upper filesystems for the lower vnode unlinking. When inactivating a nullfs vnode, check if the lower vnode was unlinked, indicated by nullfs flag NULLV_DROP or VV_NOSYNC on the lower vnode, and reclaim upper vnode if so. This allows nullfs to purge cached vnodes for the unlinked lower vnode, avoiding excessive caching. Reported by: G??ran L??wkrantz <goran.lowkrantz@ismobile.com> Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Modified: head/sys/fs/nullfs/null.h head/sys/fs/nullfs/null_subr.c head/sys/fs/nullfs/null_vfsops.c head/sys/fs/nullfs/null_vnops.c head/sys/kern/vfs_subr.c head/sys/kern/vfs_syscalls.c head/sys/sys/mount.h Modified: head/sys/fs/nullfs/null.h ============================================================================== --- head/sys/fs/nullfs/null.h Sat May 11 10:51:32 2013 (r250504) +++ head/sys/fs/nullfs/null.h Sat May 11 11:17:44 2013 (r250505) @@ -53,8 +53,12 @@ struct null_node { LIST_ENTRY(null_node) null_hash; /* Hash list */ struct vnode *null_lowervp; /* VREFed once */ struct vnode *null_vnode; /* Back pointer */ + u_int null_flags; }; +#define NULLV_NOUNLOCK 0x0001 +#define NULLV_DROP 0x0002 + #define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data)) #define VTONULL(vp) ((struct null_node *)(vp)->v_data) #define NULLTOV(xp) ((xp)->null_vnode) Modified: head/sys/fs/nullfs/null_subr.c ============================================================================== --- head/sys/fs/nullfs/null_subr.c Sat May 11 10:51:32 2013 (r250504) +++ head/sys/fs/nullfs/null_subr.c Sat May 11 11:17:44 2013 (r250505) @@ -247,6 +247,7 @@ null_nodeget(mp, lowervp, vpp) xp->null_vnode = vp; xp->null_lowervp = lowervp; + xp->null_flags = 0; vp->v_type = lowervp->v_type; vp->v_data = xp; vp->v_vnlock = lowervp->v_vnlock; Modified: head/sys/fs/nullfs/null_vfsops.c ============================================================================== --- head/sys/fs/nullfs/null_vfsops.c Sat May 11 10:51:32 2013 (r250504) +++ head/sys/fs/nullfs/null_vfsops.c Sat May 11 11:17:44 2013 (r250505) @@ -65,7 +65,6 @@ static vfs_statfs_t nullfs_statfs; static vfs_unmount_t nullfs_unmount; static vfs_vget_t nullfs_vget; static vfs_extattrctl_t nullfs_extattrctl; -static vfs_reclaim_lowervp_t nullfs_reclaim_lowervp; /* * Mount null layer @@ -391,8 +390,37 @@ nullfs_reclaim_lowervp(struct mount *mp, vp = null_hashget(mp, lowervp); if (vp == NULL) return; + VTONULL(vp)->null_flags |= NULLV_NOUNLOCK; vgone(vp); - vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY); + vput(vp); +} + +static void +nullfs_unlink_lowervp(struct mount *mp, struct vnode *lowervp) +{ + struct vnode *vp; + struct null_node *xp; + + vp = null_hashget(mp, lowervp); + if (vp == NULL) + return; + xp = VTONULL(vp); + xp->null_flags |= NULLV_DROP | NULLV_NOUNLOCK; + vhold(vp); + vunref(vp); + + /* + * If vunref() dropped the last use reference on the nullfs + * vnode, it must be reclaimed, and its lock was split from + * the lower vnode lock. Need to do extra unlock before + * allowing the final vdrop() to free the vnode. + */ + if (vp->v_usecount == 0) { + KASSERT((vp->v_iflag & VI_DOOMED) != 0, + ("not reclaimed %p", vp)); + VOP_UNLOCK(vp, 0); + } + vdrop(vp); } static struct vfsops null_vfsops = { @@ -408,6 +436,7 @@ static struct vfsops null_vfsops = { .vfs_unmount = nullfs_unmount, .vfs_vget = nullfs_vget, .vfs_reclaim_lowervp = nullfs_reclaim_lowervp, + .vfs_unlink_lowervp = nullfs_unlink_lowervp, }; VFS_SET(null_vfsops, nullfs, VFCF_LOOPBACK | VFCF_JAIL); Modified: head/sys/fs/nullfs/null_vnops.c ============================================================================== --- head/sys/fs/nullfs/null_vnops.c Sat May 11 10:51:32 2013 (r250504) +++ head/sys/fs/nullfs/null_vnops.c Sat May 11 11:17:44 2013 (r250505) @@ -692,18 +692,24 @@ null_unlock(struct vop_unlock_args *ap) static int null_inactive(struct vop_inactive_args *ap __unused) { - struct vnode *vp; + struct vnode *vp, *lvp; + struct null_node *xp; struct mount *mp; struct null_mount *xmp; vp = ap->a_vp; + xp = VTONULL(vp); + lvp = NULLVPTOLOWERVP(vp); mp = vp->v_mount; xmp = MOUNTTONULLMOUNT(mp); - if ((xmp->nullm_flags & NULLM_CACHE) == 0) { + if ((xmp->nullm_flags & NULLM_CACHE) == 0 || + (xp->null_flags & NULLV_DROP) != 0 || + (lvp->v_vflag & VV_NOSYNC) != 0) { /* * If this is the last reference and caching of the - * nullfs vnodes is not enabled, then free up the - * vnode so as not to tie up the lower vnodes. + * nullfs vnodes is not enabled, or the lower vnode is + * deleted, then free up the vnode so as not to tie up + * the lower vnodes. */ vp->v_object = NULL; vrecycle(vp); @@ -748,7 +754,10 @@ null_reclaim(struct vop_reclaim_args *ap */ if (vp->v_writecount > 0) VOP_ADD_WRITECOUNT(lowervp, -1); - vput(lowervp); + if ((xp->null_flags & NULLV_NOUNLOCK) != 0) + vunref(lowervp); + else + vput(lowervp); free(xp, M_NULLFSNODE); return (0); Modified: head/sys/kern/vfs_subr.c ============================================================================== --- head/sys/kern/vfs_subr.c Sat May 11 10:51:32 2013 (r250504) +++ head/sys/kern/vfs_subr.c Sat May 11 11:17:44 2013 (r250505) @@ -2700,19 +2700,20 @@ vgone(struct vnode *vp) } static void -vgonel_reclaim_lowervp_vfs(struct mount *mp __unused, +notify_lowervp_vfs_dummy(struct mount *mp __unused, struct vnode *lowervp __unused) { } /* - * Notify upper mounts about reclaimed vnode. + * Notify upper mounts about reclaimed or unlinked vnode. */ -static void -vgonel_reclaim_lowervp(struct vnode *vp) +void +vfs_notify_upper(struct vnode *vp, int event) { static struct vfsops vgonel_vfsops = { - .vfs_reclaim_lowervp = vgonel_reclaim_lowervp_vfs + .vfs_reclaim_lowervp = notify_lowervp_vfs_dummy, + .vfs_unlink_lowervp = notify_lowervp_vfs_dummy, }; struct mount *mp, *ump, *mmp; @@ -2736,7 +2737,17 @@ vgonel_reclaim_lowervp(struct vnode *vp) } TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link); MNT_IUNLOCK(mp); - VFS_RECLAIM_LOWERVP(ump, vp); + switch (event) { + case VFS_NOTIFY_UPPER_RECLAIM: + VFS_RECLAIM_LOWERVP(ump, vp); + break; + case VFS_NOTIFY_UPPER_UNLINK: + VFS_UNLINK_LOWERVP(ump, vp); + break; + default: + KASSERT(0, ("invalid event %d", event)); + break; + } MNT_ILOCK(mp); ump = TAILQ_NEXT(mmp, mnt_upper_link); TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link); @@ -2783,7 +2794,7 @@ vgonel(struct vnode *vp) active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); - vgonel_reclaim_lowervp(vp); + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); /* * Clean out any buffers associated with the vnode. Modified: head/sys/kern/vfs_syscalls.c ============================================================================== --- head/sys/kern/vfs_syscalls.c Sat May 11 10:51:32 2013 (r250504) +++ head/sys/kern/vfs_syscalls.c Sat May 11 11:17:44 2013 (r250505) @@ -1846,6 +1846,7 @@ restart: if (error) goto out; #endif + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); #ifdef MAC out: @@ -3825,6 +3826,7 @@ restart: return (error); goto restart; } + vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); vn_finished_write(mp); out: Modified: head/sys/sys/mount.h ============================================================================== --- head/sys/sys/mount.h Sat May 11 10:51:32 2013 (r250504) +++ head/sys/sys/mount.h Sat May 11 11:17:44 2013 (r250505) @@ -608,7 +608,7 @@ typedef int vfs_mount_t(struct mount *mp typedef int vfs_sysctl_t(struct mount *mp, fsctlop_t op, struct sysctl_req *req); typedef void vfs_susp_clean_t(struct mount *mp); -typedef void vfs_reclaim_lowervp_t(struct mount *mp, struct vnode *lowervp); +typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp); struct vfsops { vfs_mount_t *vfs_mount; @@ -626,7 +626,8 @@ struct vfsops { vfs_extattrctl_t *vfs_extattrctl; vfs_sysctl_t *vfs_sysctl; vfs_susp_clean_t *vfs_susp_clean; - vfs_reclaim_lowervp_t *vfs_reclaim_lowervp; + vfs_notify_lowervp_t *vfs_reclaim_lowervp; + vfs_notify_lowervp_t *vfs_unlink_lowervp; }; vfs_statfs_t __vfs_statfs; @@ -747,6 +748,14 @@ vfs_statfs_t __vfs_statfs; } \ } while (0) +#define VFS_UNLINK_LOWERVP(MP, VP) do { \ + if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \ + VFS_PROLOGUE(MP); \ + (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \ + VFS_EPILOGUE(MP); \ + } \ +} while (0) + #define VFS_KNOTE_LOCKED(vp, hint) do \ { \ if (((vp)->v_vflag & VV_NOKNOTE) == 0) \ @@ -759,6 +768,9 @@ vfs_statfs_t __vfs_statfs; VN_KNOTE((vp), (hint), 0); \ } while (0) +#define VFS_NOTIFY_UPPER_RECLAIM 1 +#define VFS_NOTIFY_UPPER_UNLINK 2 + #include <sys/module.h> /* @@ -840,6 +852,7 @@ int vfs_modevent(module_t, int, void *); void vfs_mount_error(struct mount *, const char *, ...); void vfs_mountroot(void); /* mount our root filesystem */ void vfs_mountedfrom(struct mount *, const char *from); +void vfs_notify_upper(struct vnode *, int); void vfs_oexport_conv(const struct oexport_args *oexp, struct export_args *exp); void vfs_ref(struct mount *);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201305111117.r4BBHidt052772>