Date: Fri, 23 Jan 2015 12:29:36 +0200 From: Konstantin Belousov <kostikbel@gmail.com> To: Ronald Klop <ronald-lists@klop.ws> Cc: freebsd-stable@freebsd.org Subject: Re: mmap on tmpfs not updating mtime Message-ID: <20150123102936.GF42409@kib.kiev.ua> In-Reply-To: <20150123084645.GE42409@kib.kiev.ua> References: <op.xsu80gb9kndu52@ronaldradial.radialsg.local> <20150123084645.GE42409@kib.kiev.ua>
next in thread | previous in thread | raw e-mail | index | archive | help
On Fri, Jan 23, 2015 at 10:46:45AM +0200, Konstantin Belousov wrote: > The detection of modifications could be done, e.g. by utilizing syncer > to make a pass over all active vnodes and converting OBJ_MIGHTBEDIRTY > flag for the backing vm object into mtime update. Of course, VM must be > modified to also set the flag or its analog (OBJT_MIGHTBEDIRTYTMPFS ?) > for tmpfs vm objects. This is what I mean above as 'relatively hard'. > > I will review (and commit) the patch along these lines. Ok, it is even more complicated due to the interaction with the fast path in the fault handler, which ignored calls to vm_object_set_dirty() for tmpfs vnodes. Below is the prototype. It is on par with e.g. UFS, which only update mtime for writes through mapped regions when syncer finds such dirty pages. diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h index 445bf61..b077489 100644 --- a/sys/fs/tmpfs/tmpfs.h +++ b/sys/fs/tmpfs/tmpfs.h @@ -398,6 +398,7 @@ int tmpfs_alloc_vp(struct mount *, struct tmpfs_node *, int, void tmpfs_free_vp(struct vnode *); int tmpfs_alloc_file(struct vnode *, struct vnode **, struct vattr *, struct componentname *, char *); +void tmpfs_check_mtime(struct vnode *); void tmpfs_dir_attach(struct vnode *, struct tmpfs_dirent *); void tmpfs_dir_detach(struct vnode *, struct tmpfs_dirent *); void tmpfs_dir_destroy(struct tmpfs_mount *, struct tmpfs_node *); diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index d0d9a15..c1930f1 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1415,6 +1415,31 @@ retry: return (0); } +void +tmpfs_check_mtime(struct vnode *vp) +{ + struct tmpfs_node *node; + struct vm_object *obj; + + ASSERT_VOP_ELOCKED(vp, "check_mtime"); + if (vp->v_type != VREG) + return; + node = VP_TO_TMPFS_NODE(vp); + obj = vp->v_object; + KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) == + (OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj")); + /* unlocked read */ + if ((obj->flags & OBJ_TMPFS_DIRTY) != 0) { + VM_OBJECT_WLOCK(obj); + if ((obj->flags & OBJ_TMPFS_DIRTY) != 0) { + obj->flags &= ~OBJ_TMPFS_DIRTY; + node = VP_TO_TMPFS_NODE(vp); + node->tn_status |= TMPFS_NODE_MODIFIED; + } + VM_OBJECT_WUNLOCK(obj); + } +} + /* * Change flags of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c index f389f1c..2942e5a 100644 --- a/sys/fs/tmpfs/tmpfs_vfsops.c +++ b/sys/fs/tmpfs/tmpfs_vfsops.c @@ -33,10 +33,10 @@ /* * Efficient memory file system. * - * tmpfs is a file system that uses NetBSD's virtual memory sub-system - * (the well-known UVM) to store file data and metadata in an efficient - * way. This means that it does not follow the structure of an on-disk - * file system because it simply does not need to. Instead, it uses + * tmpfs is a file system that uses FreeBSD's virtual memory + * sub-system to store file data and metadata in an efficient way. + * This means that it does not follow the structure of an on-disk file + * system because it simply does not need to. Instead, it uses * memory-specific data structures and algorithms to automatically * allocate and release resources. */ @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/jail.h> #include <sys/kernel.h> +#include <sys/rwlock.h> #include <sys/stat.h> #include <sys/systm.h> #include <sys/sysctl.h> @@ -418,11 +419,45 @@ tmpfs_statfs(struct mount *mp, struct statfs *sbp) static int tmpfs_sync(struct mount *mp, int waitfor) { + struct vnode *vp, *mvp; + struct vm_object *obj; if (waitfor == MNT_SUSPEND) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; MNT_IUNLOCK(mp); + } else if (waitfor == MNT_LAZY) { + /* + * Handle lazy updates of mtime from writes to mmaped + * regions. Use MNT_VNODE_FOREACH_ALL instead of + * MNT_VNODE_FOREACH_ACTIVE, since unmap of the + * tmpfs-backed vnode does not call vinactive(), due + * to vm object type is OBJT_SWAP. + */ + MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { + if (vp->v_type != VREG) { + VI_UNLOCK(vp); + continue; + } + obj = vp->v_object; + KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) == + (OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj")); + + /* + * Unlocked read, avoid taking vnode lock if + * not needed. Lost update will be handled on + * the next call. + */ + if ((obj->flags & OBJ_TMPFS_DIRTY) == 0) { + VI_UNLOCK(vp); + continue; + } + if (vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, + curthread) != 0) + continue; + tmpfs_check_mtime(vp); + vput(vp); + } } return (0); } diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c index c811b9a..65c5f82 100644 --- a/sys/fs/tmpfs/tmpfs_vnops.c +++ b/sys/fs/tmpfs/tmpfs_vnops.c @@ -505,6 +505,7 @@ tmpfs_fsync(struct vop_fsync_args *v) MPASS(VOP_ISLOCKED(vp)); + tmpfs_check_mtime(vp); tmpfs_update(vp); return 0; @@ -1222,16 +1223,16 @@ tmpfs_readlink(struct vop_readlink_args *v) static int tmpfs_inactive(struct vop_inactive_args *v) { - struct vnode *vp = v->a_vp; - + struct vnode *vp; struct tmpfs_node *node; + vp = v->a_vp; node = VP_TO_TMPFS_NODE(vp); - if (node->tn_links == 0) vrecycle(vp); - - return 0; + else + tmpfs_check_mtime(vp); + return (0); } int diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index e62410b..71c6d84 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -358,11 +358,13 @@ RetryFault:; (fault_flags & (VM_FAULT_CHANGE_WIRING | VM_FAULT_DIRTY)) == 0 && /* avoid calling vm_object_set_writeable_dirty() */ ((prot & VM_PROT_WRITE) == 0 || - fs.first_object->type != OBJT_VNODE || + (fs.first_object->type != OBJT_VNODE && + (fs.first_object->flags & OBJ_TMPFS_NODE) == 0) || (fs.first_object->flags & OBJ_MIGHTBEDIRTY) != 0)) { VM_OBJECT_RLOCK(fs.first_object); if ((prot & VM_PROT_WRITE) != 0 && - fs.first_object->type == OBJT_VNODE && + (fs.first_object->type == OBJT_VNODE || + (fs.first_object->flags & OBJ_TMPFS_NODE) != 0) && (fs.first_object->flags & OBJ_MIGHTBEDIRTY) == 0) goto fast_failed; m = vm_page_lookup(fs.first_object, fs.first_pindex); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 21c15dc..63127c0 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -2199,8 +2199,13 @@ vm_object_set_writeable_dirty(vm_object_t object) { VM_OBJECT_ASSERT_WLOCKED(object); - if (object->type != OBJT_VNODE) + if (object->type != OBJT_VNODE) { + if ((object->flags & OBJ_TMPFS_NODE) != 0) { + KASSERT(object->type == OBJT_SWAP, ("non-swap tmpfs")); + vm_object_set_flag(object, OBJ_TMPFS_DIRTY); + } return; + } object->generation++; if ((object->flags & OBJ_MIGHTBEDIRTY) != 0) return; diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index ab3c7d3..d80b1d8 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -187,6 +187,7 @@ struct vm_object { #define OBJ_PIPWNT 0x0040 /* paging in progress wanted */ #define OBJ_MIGHTBEDIRTY 0x0100 /* object might be dirty, only for vnode */ #define OBJ_TMPFS_NODE 0x0200 /* object belongs to tmpfs VREG node */ +#define OBJ_TMPFS_DIRTY 0x0400 /* dirty tmpfs obj */ #define OBJ_COLORED 0x1000 /* pg_color is defined */ #define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */ #define OBJ_DISCONNECTWNT 0x4000 /* disconnect from vnode wanted */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20150123102936.GF42409>