Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 26 Aug 2017 18:18:27 +0200
From:      Daniel Roethlisberger <daniel@roe.ch>
To:        freebsd-hackers@freebsd.org
Subject:   [PATCH] O_NOATIME support for open(2)
Message-ID:  <20170826161827.GA21456@schoggimuss.roe.ch>

next in thread | raw e-mail | index | archive | help

--45Z9DzgjV8m4Oswq
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

I'm trying to implement O_NOATIME support for open(2) in order to
provide a more elegant way for backup/archiving software to
prevent atime clobbering.  Except for a 2008 thread on this list
I did not find any material; not sure if anybody is interested in
this or if there are reasons why this was never implemented.

The attached patch against 11.1 implements O_NOATIME support for
open(2); it prevents read(2) and mmap(2) from clobbering atime if
the file descriptor was opened with O_NOATIME.  O_NOATIME is only
permitted for root and the owner of the file.  Currently it is
only implemented for ufs/ffs.  It seems to work for me but has
not been extensively tested.

I am interested in feedback from people who know their way around
I/O and VFS code before I extend this to other file systems, make
O_NOATIME tunable by fcntl(2), wire it to the Linux compat layer
and write docs.  Does the implementation look sane?  Did I miss
something important?

Specifically, is there a better way to pass O_NOATIME into
vm_mmap_vnode other than adding an additional boolean_t argument?
I did not use an additional mmap flag because that would have
required additional logic to prevent userland from passing the
flag to the mmap syscall.

Daniel

-- 
Daniel Roethlisberger
http://daniel.roe.ch/


--45Z9DzgjV8m4Oswq
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="onoatime-v2.diff"

diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 3138dda..2f75b2b 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -317,6 +317,8 @@ vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred,
 	}
 	if (fmode & FREAD)
 		accmode |= VREAD;
+	if ((fmode & FNOATIME) && (fmode & FREAD))
+		accmode |= VADMIN;
 	if (fmode & FEXEC)
 		accmode |= VEXEC;
 	if ((fmode & O_APPEND) && (fmode & FWRITE))
@@ -798,6 +800,8 @@ vn_read(fp, uio, active_cred, flags, td)
 		ioflag |= IO_NDELAY;
 	if (fp->f_flag & O_DIRECT)
 		ioflag |= IO_DIRECT;
+	if (fp->f_flag & FNOATIME)
+		ioflag |= IO_NOATIME;
 	advice = get_advice(fp, uio);
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 
@@ -2398,6 +2402,7 @@ vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
 	vm_object_t object;
 	vm_prot_t maxprot;
 	boolean_t writecounted;
+	boolean_t noatime;
 	int error;
 
 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
@@ -2470,8 +2475,9 @@ vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
 	    foff < 0 || foff > OFF_MAX - size)
 		return (EINVAL);
 
+	noatime = fp->f_flag & FNOATIME;
 	writecounted = FALSE;
-	error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp,
+	error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp, noatime,
 	    &foff, &object, &writecounted);
 	if (error != 0)
 		return (error);
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index d1d0062..fbd2931 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -133,6 +133,11 @@ typedef	__pid_t		pid_t;
 #define	O_VERIFY	0x00200000	/* open only after verification */
 #endif
 
+#define O_NOATIME	0x00400000	/* do not update atime */
+#ifdef _KERNEL
+#define FNOATIME	O_NOATIME
+#endif
+
 /*
  * XXX missing O_DSYNC, O_RSYNC.
  */
@@ -150,7 +155,7 @@ typedef	__pid_t		pid_t;
 #define	OFLAGS(fflags)	((fflags) & O_EXEC ? (fflags) : (fflags) - 1)
 
 /* bits to save after open */
-#define	FMASK	(FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT|FEXEC)
+#define	FMASK	(FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT|FEXEC|FNOATIME)
 /* bits settable by fcntl(F_SETFL, ...) */
 #define	FCNTLFLAGS	(FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)
 
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index dedbec6..28e0923 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -302,6 +302,7 @@ struct vattr {
 #define	IO_INVAL	0x0040		/* invalidate after I/O */
 #define	IO_SYNC		0x0080		/* do I/O synchronously */
 #define	IO_DIRECT	0x0100		/* attempt to bypass buffer cache */
+#define	IO_NOATIME	0x0200		/* do not update atime */
 #define	IO_EXT		0x0400		/* operate on external attributes */
 #define	IO_NORMAL	0x0800		/* operate on regular data */
 #define	IO_NOMACCHECK	0x1000		/* MAC checks unnecessary */
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index b1de1b8..3067d2e 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -672,6 +672,7 @@ ffs_read(ap)
 
 	if ((error == 0 || uio->uio_resid != orig_resid) &&
 	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0 &&
+	    (ioflag & IO_NOATIME) == 0 &&
 	    (ip->i_flag & IN_ACCESS) == 0) {
 		VI_LOCK(vp);
 		ip->i_flag |= IN_ACCESS;
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index c37973d..57503f9 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -94,7 +94,7 @@ int vm_mmap_to_errno(int rv);
 int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
     int *, struct cdev *, struct cdevsw *, vm_ooffset_t *, vm_object_t *);
 int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *,
-    struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *);
+    struct vnode *, boolean_t, vm_ooffset_t *, vm_object_t *, boolean_t *);
 void vm_set_page_size(void);
 void vm_sync_icache(vm_map_t, vm_offset_t, vm_size_t);
 typedef int (*pmap_pinit_t)(struct pmap *pmap);
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index d0f14f3..d1d5cab 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1191,7 +1191,7 @@ kern_munlock(struct thread *td, uintptr_t addr0, size_t size)
 int
 vm_mmap_vnode(struct thread *td, vm_size_t objsize,
     vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
-    struct vnode *vp, vm_ooffset_t *foffp, vm_object_t *objp,
+    struct vnode *vp, boolean_t noatime, vm_ooffset_t *foffp, vm_object_t *objp,
     boolean_t *writecounted)
 {
 	struct vattr va;
@@ -1283,7 +1283,8 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize,
 	*objp = obj;
 	*flagsp = flags;
 
-	vfs_mark_atime(vp, cred);
+	if (!noatime)
+		vfs_mark_atime(vp, cred);
 
 done:
 	if (error != 0 && *writecounted) {
@@ -1400,7 +1401,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
 	}
 	case OBJT_VNODE:
 		error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
-		    handle, &foff, &object, &writecounted);
+		    handle, FALSE, &foff, &object, &writecounted);
 		break;
 	case OBJT_DEFAULT:
 		if (handle == NULL) {

--45Z9DzgjV8m4Oswq--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20170826161827.GA21456>