Date: Sat, 26 Aug 2017 18:18:27 +0200 From: Daniel Roethlisberger <daniel@roe.ch> To: freebsd-hackers@freebsd.org Subject: [PATCH] O_NOATIME support for open(2) Message-ID: <20170826161827.GA21456@schoggimuss.roe.ch>
next in thread | raw e-mail | index | archive | help
--45Z9DzgjV8m4Oswq Content-Type: text/plain; charset=us-ascii Content-Disposition: inline I'm trying to implement O_NOATIME support for open(2) in order to provide a more elegant way for backup/archiving software to prevent atime clobbering. Except for a 2008 thread on this list I did not find any material; not sure if anybody is interested in this or if there are reasons why this was never implemented. The attached patch against 11.1 implements O_NOATIME support for open(2); it prevents read(2) and mmap(2) from clobbering atime if the file descriptor was opened with O_NOATIME. O_NOATIME is only permitted for root and the owner of the file. Currently it is only implemented for ufs/ffs. It seems to work for me but has not been extensively tested. I am interested in feedback from people who know their way around I/O and VFS code before I extend this to other file systems, make O_NOATIME tunable by fcntl(2), wire it to the Linux compat layer and write docs. Does the implementation look sane? Did I miss something important? Specifically, is there a better way to pass O_NOATIME into vm_mmap_vnode other than adding an additional boolean_t argument? I did not use an additional mmap flag because that would have required additional logic to prevent userland from passing the flag to the mmap syscall. Daniel -- Daniel Roethlisberger http://daniel.roe.ch/ --45Z9DzgjV8m4Oswq Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="onoatime-v2.diff" diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 3138dda..2f75b2b 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -317,6 +317,8 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, } if (fmode & FREAD) accmode |= VREAD; + if ((fmode & FNOATIME) && (fmode & FREAD)) + accmode |= VADMIN; if (fmode & FEXEC) accmode |= VEXEC; if ((fmode & O_APPEND) && (fmode & FWRITE)) @@ -798,6 +800,8 @@ vn_read(fp, uio, active_cred, flags, td) ioflag |= IO_NDELAY; if (fp->f_flag & O_DIRECT) ioflag |= IO_DIRECT; + if (fp->f_flag & FNOATIME) + ioflag |= IO_NOATIME; advice = get_advice(fp, uio); vn_lock(vp, LK_SHARED | LK_RETRY); @@ -2398,6 +2402,7 @@ vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_object_t object; vm_prot_t maxprot; boolean_t writecounted; + boolean_t noatime; int error; #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ @@ -2470,8 +2475,9 @@ vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, foff < 0 || foff > OFF_MAX - size) return (EINVAL); + noatime = fp->f_flag & FNOATIME; writecounted = FALSE; - error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp, + error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp, noatime, &foff, &object, &writecounted); if (error != 0) return (error); diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index d1d0062..fbd2931 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -133,6 +133,11 @@ typedef __pid_t pid_t; #define O_VERIFY 0x00200000 /* open only after verification */ #endif +#define O_NOATIME 0x00400000 /* do not update atime */ +#ifdef _KERNEL +#define FNOATIME O_NOATIME +#endif + /* * XXX missing O_DSYNC, O_RSYNC. */ @@ -150,7 +155,7 @@ typedef __pid_t pid_t; #define OFLAGS(fflags) ((fflags) & O_EXEC ? (fflags) : (fflags) - 1) /* bits to save after open */ -#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT|FEXEC) +#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT|FEXEC|FNOATIME) /* bits settable by fcntl(F_SETFL, ...) */ #define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT) diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index dedbec6..28e0923 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -302,6 +302,7 @@ struct vattr { #define IO_INVAL 0x0040 /* invalidate after I/O */ #define IO_SYNC 0x0080 /* do I/O synchronously */ #define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */ +#define IO_NOATIME 0x0200 /* do not update atime */ #define IO_EXT 0x0400 /* operate on external attributes */ #define IO_NORMAL 0x0800 /* operate on regular data */ #define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */ diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index b1de1b8..3067d2e 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -672,6 +672,7 @@ ffs_read(ap) if ((error == 0 || uio->uio_resid != orig_resid) && (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0 && + (ioflag & IO_NOATIME) == 0 && (ip->i_flag & IN_ACCESS) == 0) { VI_LOCK(vp); ip->i_flag |= IN_ACCESS; diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index c37973d..57503f9 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -94,7 +94,7 @@ int vm_mmap_to_errno(int rv); int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, struct cdev *, struct cdevsw *, vm_ooffset_t *, vm_object_t *); int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *, - struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *); + struct vnode *, boolean_t, vm_ooffset_t *, vm_object_t *, boolean_t *); void vm_set_page_size(void); void vm_sync_icache(vm_map_t, vm_offset_t, vm_size_t); typedef int (*pmap_pinit_t)(struct pmap *pmap); diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index d0f14f3..d1d5cab 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1191,7 +1191,7 @@ kern_munlock(struct thread *td, uintptr_t addr0, size_t size) int vm_mmap_vnode(struct thread *td, vm_size_t objsize, vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp, - struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp, + struct vnode *vp, boolean_t noatime, vm_ooffset_t *foffp, vm_object_t *objp, boolean_t *writecounted) { struct vattr va; @@ -1283,7 +1283,8 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize, *objp = obj; *flagsp = flags; - vfs_mark_atime(vp, cred); + if (!noatime) + vfs_mark_atime(vp, cred); done: if (error != 0 && *writecounted) { @@ -1400,7 +1401,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, } case OBJT_VNODE: error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, - handle, &foff, &object, &writecounted); + handle, FALSE, &foff, &object, &writecounted); break; case OBJT_DEFAULT: if (handle == NULL) { --45Z9DzgjV8m4Oswq--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20170826161827.GA21456>