Date: Sun, 2 Nov 2008 10:15:42 +0000 (UTC) From: Attilio Rao <attilio@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r184554 - in head/sys: geom/journal gnu/fs/ext2fs kern nfsclient sys ufs/ffs ufs/ufs Message-ID: <200811021015.mA2AFgim036178@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: attilio Date: Sun Nov 2 10:15:42 2008 New Revision: 184554 URL: http://svn.freebsd.org/changeset/base/184554 Log: Improve VFS locking: - Implement real draining for vfs consumers by not relying on the mnt_lock and using instead a refcount in order to keep track of lock requesters. - Due to the change above, remove the mnt_lock lockmgr because it is now useless. - Due to the change above, vfs_busy() is no more linked to a lockmgr. Change so its KPI by removing the interlock argument and defining 2 new flags for it: MBF_NOWAIT which basically replaces the LK_NOWAIT of the old version (which was unlinked from the lockmgr alredy) and MBF_MNTLSTLOCK which provides the ability to drop the mountlist_mtx once the mnt interlock is held (ability still desired by most consumers). - The stub used into vfs_mount_destroy(), that allows to override the mnt_ref if running for more than 3 seconds, make it totally useless. Remove it as it was thought to work into older versions. If a problem of "refcount held never going away" should appear, we will need to fix properly instead than trust on such hackish solution. - Fix a bug where returning (with an error) from dounmount() was still leaving the MNTK_MWAIT flag on even if it the waiters were actually woken up. Just a place in vfs_mount_destroy() is left because it is going to recycle the structure in any case, so it doesn't matter. - Remove the markercnt refcount as it is useless. This patch modifies VFS ABI and breaks KPI for vfs_busy() so manpages and __FreeBSD_version will be modified accordingly. Discussed with: kib Tested by: pho Modified: head/sys/geom/journal/g_journal.c head/sys/gnu/fs/ext2fs/ext2_vfsops.c head/sys/kern/vfs_lookup.c head/sys/kern/vfs_mount.c head/sys/kern/vfs_subr.c head/sys/kern/vfs_syscalls.c head/sys/kern/vfs_vnops.c head/sys/nfsclient/nfs_socket.c head/sys/nfsclient/nfs_vfsops.c head/sys/sys/mount.h head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ufs/ufs_vfsops.c Modified: head/sys/geom/journal/g_journal.c ============================================================================== --- head/sys/geom/journal/g_journal.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/geom/journal/g_journal.c Sun Nov 2 10:15:42 2008 (r184554) @@ -2879,7 +2879,7 @@ g_journal_do_switch(struct g_class *clas desc = g_journal_find_desc(mp->mnt_stat.f_fstypename); if (desc == NULL) continue; - if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx)) + if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) continue; /* mtx_unlock(&mountlist_mtx) was done inside vfs_busy() */ Modified: head/sys/gnu/fs/ext2fs/ext2_vfsops.c ============================================================================== --- head/sys/gnu/fs/ext2fs/ext2_vfsops.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/gnu/fs/ext2fs/ext2_vfsops.c Sun Nov 2 10:15:42 2008 (r184554) @@ -171,7 +171,7 @@ ext2_mount(mp, td) flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; - if (vfs_busy(mp, LK_NOWAIT, 0)) + if (vfs_busy(mp, MBF_NOWAIT)) return (EBUSY); error = ext2_flushfiles(mp, flags, td); vfs_unbusy(mp); Modified: head/sys/kern/vfs_lookup.c ============================================================================== --- head/sys/kern/vfs_lookup.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/kern/vfs_lookup.c Sun Nov 2 10:15:42 2008 (r184554) @@ -684,7 +684,7 @@ unionlookup: */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { - if (vfs_busy(mp, 0, 0)) + if (vfs_busy(mp, 0)) continue; vput(dp); VFS_UNLOCK_GIANT(vfslocked); Modified: head/sys/kern/vfs_mount.c ============================================================================== --- head/sys/kern/vfs_mount.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/kern/vfs_mount.c Sun Nov 2 10:15:42 2008 (r184554) @@ -450,7 +450,6 @@ mount_init(void *mem, int size, int flag mp = (struct mount *)mem; mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF); - lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0); return (0); } @@ -462,7 +461,6 @@ mount_fini(void *mem, int size) mp = (struct mount *)mem; lockdestroy(&mp->mnt_explock); - lockdestroy(&mp->mnt_lock); mtx_destroy(&mp->mnt_mtx); } @@ -481,7 +479,7 @@ vfs_mount_alloc(struct vnode *vp, struct TAILQ_INIT(&mp->mnt_nvnodelist); mp->mnt_nvnodelistsize = 0; mp->mnt_ref = 0; - (void) vfs_busy(mp, LK_NOWAIT, 0); + (void) vfs_busy(mp, MBF_NOWAIT); mp->mnt_op = vfsp->vfc_vfsops; mp->mnt_vfc = vfsp; vfsp->vfc_refcount++; /* XXX Unlocked */ @@ -507,19 +505,10 @@ vfs_mount_alloc(struct vnode *vp, struct void vfs_mount_destroy(struct mount *mp) { - int i; MNT_ILOCK(mp); - for (i = 0; mp->mnt_ref && i < 3; i++) - msleep(mp, MNT_MTX(mp), PVFS, "mntref", hz); - /* - * This will always cause a 3 second delay in rebooting due to - * refs on the root mountpoint that never go away. Most of these - * are held by init which never exits. - */ - if (i == 3 && (!rebooting || bootverbose)) - printf("Mount point %s had %d dangling refs\n", - mp->mnt_stat.f_mntonname, mp->mnt_ref); + while (mp->mnt_ref) + msleep(mp, MNT_MTX(mp), PVFS, "mntref", 0); if (mp->mnt_holdcnt != 0) { printf("Waiting for mount point to be unheld\n"); while (mp->mnt_holdcnt != 0) { @@ -928,7 +917,7 @@ vfs_domount( vput(vp); return (error); } - if (vfs_busy(mp, LK_NOWAIT, 0)) { + if (vfs_busy(mp, MBF_NOWAIT)) { vput(vp); return (EBUSY); } @@ -1245,19 +1234,31 @@ dounmount(mp, flags, td) /* Allow filesystems to detect that a forced unmount is in progress. */ if (flags & MNT_FORCE) mp->mnt_kern_flag |= MNTK_UNMOUNTF; - error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK | - ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), MNT_MTX(mp)); - if (error) { - MNT_ILOCK(mp); - mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ | - MNTK_UNMOUNTF); - if (mp->mnt_kern_flag & MNTK_MWAIT) - wakeup(mp); - MNT_IUNLOCK(mp); - if (coveredvp) - VOP_UNLOCK(coveredvp, 0); - return (error); + error = 0; + if (mp->mnt_lockref) { + if (flags & MNT_FORCE) { + mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_NOINSMNTQ | + MNTK_UNMOUNTF); + if (mp->mnt_kern_flag & MNTK_MWAIT) { + mp->mnt_kern_flag &= ~MNTK_MWAIT; + wakeup(mp); + } + MNT_IUNLOCK(mp); + if (coveredvp) + VOP_UNLOCK(coveredvp, 0); + return (EBUSY); + } + mp->mnt_kern_flag |= MNTK_DRAINING; + error = msleep(&mp->mnt_lockref, MNT_MTX(mp), PVFS, + "mount drain", 0); } + MNT_IUNLOCK(mp); + KASSERT(mp->mnt_lockref == 0, + ("%s: invalid lock refcount in the drain path @ %s:%d", + __func__, __FILE__, __LINE__)); + KASSERT(error == 0, + ("%s: invalid return value for msleep in the drain path @ %s:%d", + __func__, __FILE__, __LINE__)); vn_start_write(NULL, &mp, V_WAIT); if (mp->mnt_flag & MNT_EXPUBLIC) @@ -1321,9 +1322,10 @@ dounmount(mp, flags, td) mp->mnt_flag |= async_flag; if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) mp->mnt_kern_flag |= MNTK_ASYNC; - lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); - if (mp->mnt_kern_flag & MNTK_MWAIT) + if (mp->mnt_kern_flag & MNTK_MWAIT) { + mp->mnt_kern_flag &= ~MNTK_MWAIT; wakeup(mp); + } MNT_IUNLOCK(mp); if (coveredvp) VOP_UNLOCK(coveredvp, 0); @@ -1337,7 +1339,6 @@ dounmount(mp, flags, td) vput(coveredvp); } vfs_event_signal(NULL, VQ_UNMOUNT, 0); - lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); vfs_mount_destroy(mp); return (0); } @@ -2070,7 +2071,6 @@ __mnt_vnode_first(struct vnode **mvp, st wakeup(&mp->mnt_holdcnt); return (NULL); } - mp->mnt_markercnt++; (*mvp)->v_mount = mp; TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes); return (vp); @@ -2093,7 +2093,6 @@ __mnt_vnode_markerfree(struct vnode **mv MNT_ILOCK(mp); *mvp = NULL; - mp->mnt_markercnt--; mp->mnt_holdcnt--; if (mp->mnt_holdcnt == 0 && mp->mnt_holdcntwaiters != 0) wakeup(&mp->mnt_holdcnt); Modified: head/sys/kern/vfs_subr.c ============================================================================== --- head/sys/kern/vfs_subr.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/kern/vfs_subr.c Sun Nov 2 10:15:42 2008 (r184554) @@ -335,42 +335,36 @@ SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, /* * Mark a mount point as busy. Used to synchronize access and to delay - * unmounting. Interlock is not released on failure. + * unmounting. Eventually, mountlist_mtx is not released on failure. */ int -vfs_busy(struct mount *mp, int flags, struct mtx *interlkp) +vfs_busy(struct mount *mp, int flags) { - int lkflags; + + MPASS((flags & ~MBF_MASK) == 0); MNT_ILOCK(mp); MNT_REF(mp); if (mp->mnt_kern_flag & MNTK_UNMOUNT) { - if (flags & LK_NOWAIT) { + if (flags & MBF_NOWAIT) { MNT_REL(mp); MNT_IUNLOCK(mp); return (ENOENT); } - if (interlkp) - mtx_unlock(interlkp); + if (flags & MBF_MNTLSTLOCK) + mtx_unlock(&mountlist_mtx); mp->mnt_kern_flag |= MNTK_MWAIT; - /* - * Since all busy locks are shared except the exclusive - * lock granted when unmounting, the only place that a - * wakeup needs to be done is at the release of the - * exclusive lock at the end of dounmount. - */ msleep(mp, MNT_MTX(mp), PVFS, "vfs_busy", 0); MNT_REL(mp); MNT_IUNLOCK(mp); - if (interlkp) - mtx_lock(interlkp); + if (flags & MBF_MNTLSTLOCK) + mtx_lock(&mountlist_mtx); return (ENOENT); } - if (interlkp) - mtx_unlock(interlkp); - lkflags = LK_SHARED | LK_INTERLOCK | LK_NOWAIT; - if (lockmgr(&mp->mnt_lock, lkflags, MNT_MTX(mp))) - panic("vfs_busy: unexpected lock failure"); + if (flags & MBF_MNTLSTLOCK) + mtx_unlock(&mountlist_mtx); + mp->mnt_lockref++; + MNT_IUNLOCK(mp); return (0); } @@ -381,8 +375,15 @@ void vfs_unbusy(struct mount *mp) { - lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); - vfs_rel(mp); + MNT_ILOCK(mp); + MNT_REL(mp); + mp->mnt_lockref--; + if (mp->mnt_lockref == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) { + MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT); + mp->mnt_kern_flag &= ~MNTK_DRAINING; + wakeup(&mp->mnt_lockref); + } + MNT_IUNLOCK(mp); } /* @@ -747,7 +748,7 @@ vnlru_proc(void) mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { int vfsunlocked; - if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx)) { + if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } @@ -2837,7 +2838,6 @@ DB_SHOW_COMMAND(mount, db_show_mount) db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen); db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max); db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed); - db_printf(" mnt_markercnt = %d\n", mp->mnt_markercnt); db_printf(" mnt_holdcnt = %d\n", mp->mnt_holdcnt); db_printf(" mnt_holdcntwaiters = %d\n", mp->mnt_holdcntwaiters); db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes); @@ -2999,7 +2999,7 @@ sysctl_vnode(SYSCTL_HANDLER_ARGS) n = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx)) + if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) continue; MNT_ILOCK(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { @@ -3361,7 +3361,7 @@ sync_fsync(struct vop_fsync_args *ap) * not already on the sync list. */ mtx_lock(&mountlist_mtx); - if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_mtx) != 0) { + if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { mtx_unlock(&mountlist_mtx); return (0); } Modified: head/sys/kern/vfs_syscalls.c ============================================================================== --- head/sys/kern/vfs_syscalls.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/kern/vfs_syscalls.c Sun Nov 2 10:15:42 2008 (r184554) @@ -124,7 +124,7 @@ sync(td, uap) mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { - if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx)) { + if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } @@ -197,7 +197,7 @@ quotactl(td, uap) vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); mp = nd.ni_vp->v_mount; - if ((error = vfs_busy(mp, 0, NULL))) { + if ((error = vfs_busy(mp, 0))) { vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); @@ -479,7 +479,7 @@ kern_getfsstat(struct thread *td, struct continue; } #endif - if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx)) { + if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } @@ -741,7 +741,7 @@ fchdir(td, uap) error = change_dir(vp, td); while (!error && (mp = vp->v_mountedhere) != NULL) { int tvfslocked; - if (vfs_busy(mp, 0, 0)) + if (vfs_busy(mp, 0)) continue; tvfslocked = VFS_LOCK_GIANT(mp); error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td); Modified: head/sys/kern/vfs_vnops.c ============================================================================== --- head/sys/kern/vfs_vnops.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/kern/vfs_vnops.c Sun Nov 2 10:15:42 2008 (r184554) @@ -957,9 +957,18 @@ vn_start_write(vp, mpp, flags) } if ((mp = *mpp) == NULL) return (0); + + /* + * VOP_GETWRITEMOUNT() returns with the mp refcount held through + * a vfs_ref(). + * As long as a vnode is not provided we need to acquire a + * refcount for the provided mountpoint too, in order to + * emulate a vfs_ref(). + */ MNT_ILOCK(mp); if (vp == NULL) MNT_REF(mp); + /* * Check on status of suspension. */ @@ -1021,6 +1030,14 @@ vn_start_secondary_write(vp, mpp, flags) */ if ((mp = *mpp) == NULL) return (0); + + /* + * VOP_GETWRITEMOUNT() returns with the mp refcount held through + * a vfs_ref(). + * As long as a vnode is not provided we need to acquire a + * refcount for the provided mountpoint too, in order to + * emulate a vfs_ref(). + */ MNT_ILOCK(mp); if (vp == NULL) MNT_REF(mp); Modified: head/sys/nfsclient/nfs_socket.c ============================================================================== --- head/sys/nfsclient/nfs_socket.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/nfsclient/nfs_socket.c Sun Nov 2 10:15:42 2008 (r184554) @@ -1429,8 +1429,8 @@ nfs_timer(void *arg) /* * Terminate request if force-unmount in progress. * Note that NFS could have vfs_busy'ed the mount, - * causing the unmount to wait for the mnt_lock, making - * this bit of logic necessary. + * causing the unmount to wait and making this bit + * of logic necessary. */ if (rep->r_nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) { nfs_softterm(rep); Modified: head/sys/nfsclient/nfs_vfsops.c ============================================================================== --- head/sys/nfsclient/nfs_vfsops.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/nfsclient/nfs_vfsops.c Sun Nov 2 10:15:42 2008 (r184554) @@ -258,7 +258,7 @@ nfs_statfs(struct mount *mp, struct stat #ifndef nolint sfp = NULL; #endif - error = vfs_busy(mp, LK_NOWAIT, NULL); + error = vfs_busy(mp, MBF_NOWAIT); if (error) return (error); error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); Modified: head/sys/sys/mount.h ============================================================================== --- head/sys/sys/mount.h Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/sys/mount.h Sun Nov 2 10:15:42 2008 (r184554) @@ -142,13 +142,11 @@ struct vfsopt; * Lock reference: * m - mountlist_mtx * i - interlock - * l - mnt_lock * * Unmarked fields are considered stable as long as a ref is held. * */ struct mount { - struct lock mnt_lock; /* mount structure lock */ struct mtx mnt_mtx; /* mount structure interlock */ int mnt_gen; /* struct mount generation */ #define mnt_startzero mnt_list @@ -175,7 +173,7 @@ struct mount { struct netexport *mnt_export; /* export list */ struct label *mnt_label; /* MAC label for the fs */ u_int mnt_hashseed; /* Random seed for vfs_hash */ - int mnt_markercnt; /* marker vnodes in use */ + int mnt_lockref; /* (i) Lock reference count */ int mnt_holdcnt; /* hold count */ int mnt_holdcntwaiters; /* waits on hold count */ int mnt_secondary_writes; /* (i) # of secondary writes */ @@ -319,6 +317,7 @@ void __mnt_vnode_markerfree(str #define MNTK_ASYNC 0x00000002 /* filtered async flag */ #define MNTK_SOFTDEP 0x00000004 /* async disabled by softdep */ #define MNTK_NOINSMNTQ 0x00000008 /* insmntque is not allowed */ +#define MNTK_DRAINING 0x00000010 /* lock draining is happening */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ #define MNTK_SUSPEND 0x08000000 /* request write suspension */ @@ -506,6 +505,13 @@ struct uio; #ifdef _KERNEL +/* + * vfs_busy specific flags and mask. + */ +#define MBF_NOWAIT 0x01 +#define MBF_MNTLSTLOCK 0x02 +#define MBF_MASK (MBF_NOWAIT | MBF_MNTLSTLOCK) + #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_MOUNT); #endif @@ -686,7 +692,7 @@ int vfs_scanopt(struct vfsoptlist *opts, int vfs_setpublicfs /* set publicly exported fs */ (struct mount *, struct netexport *, struct export_args *); void vfs_msync(struct mount *, int); -int vfs_busy(struct mount *, int, struct mtx *); +int vfs_busy(struct mount *, int); int vfs_export /* process mount export info */ (struct mount *, struct export_args *); int vfs_allocate_syncvnode(struct mount *); Modified: head/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- head/sys/ufs/ffs/ffs_softdep.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/ufs/ffs/ffs_softdep.c Sun Nov 2 10:15:42 2008 (r184554) @@ -745,7 +745,7 @@ softdep_flush(void) nmp = TAILQ_NEXT(mp, mnt_list); if ((mp->mnt_flag & MNT_SOFTDEP) == 0) continue; - if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx)) + if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) continue; vfslocked = VFS_LOCK_GIANT(mp); softdep_process_worklist(mp, 0); Modified: head/sys/ufs/ufs/ufs_vfsops.c ============================================================================== --- head/sys/ufs/ufs/ufs_vfsops.c Sun Nov 2 06:40:33 2008 (r184553) +++ head/sys/ufs/ufs/ufs_vfsops.c Sun Nov 2 10:15:42 2008 (r184554) @@ -118,7 +118,7 @@ ufs_quotactl(mp, cmds, id, arg, td) if ((u_int)type >= MAXQUOTAS) return (EINVAL); - if (vfs_busy(mp, LK_NOWAIT, 0)) + if (vfs_busy(mp, MBF_NOWAIT)) return (0); switch (cmd) {
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200811021015.mA2AFgim036178>