Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 5 Jun 2013 10:52:56 +0400
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        Jilles Tjoelker <jilles@stack.nl>
Cc:        Konstantin Belousov <kostikbel@gmail.com>, arch@FreeBSD.org
Subject:   Re: aio_mlock(2) system call
Message-ID:  <20130605065256.GZ67170@glebius.int.ru>
In-Reply-To: <20130604212917.GA72412@stack.nl>
References:  <20130603100618.GH67170@FreeBSD.org> <20130603161255.GM3047@kib.kiev.ua> <20130604113035.GV67170@glebius.int.ru> <20130604212917.GA72412@stack.nl>

next in thread | previous in thread | raw e-mail | index | archive | help

--2nTeH+t2PBomgucg
Content-Type: text/plain; charset=koi8-r
Content-Disposition: inline

  Jilles,

On Tue, Jun 04, 2013 at 11:29:17PM +0200, Jilles Tjoelker wrote:
...
J> This should probably be in alphabetical order.
...
J> Man pages should not use contractions.

Fixed.

J> > [snip]
J> > Index: sys/sys/aio.h
J> > ===================================================================
J> > --- sys/sys/aio.h	(revision 251369)
J> > +++ sys/sys/aio.h	(working copy)
J> > @@ -38,6 +38,7 @@
J> >  #ifdef _KERNEL
J> >  #define	LIO_SYNC		0x3
J> >  #endif
J> > +#define	LIO_MLOCK		0x4
J> 
J> Is it intended that the new constant is available to userland, such as
J> for use in lio_listio(2)?

Hmm, I didn't intended such usage and didn't test it. You are right, I'd better
hide the constant.

Updated patch attached.

-- 
Totus tuus, Glebius.

--2nTeH+t2PBomgucg
Content-Type: text/x-diff; charset=koi8-r
Content-Disposition: attachment; filename="aio_mlock.diff"

Index: lib/libc/sys/Makefile.inc
===================================================================
--- lib/libc/sys/Makefile.inc	(revision 251369)
+++ lib/libc/sys/Makefile.inc	(working copy)
@@ -85,6 +85,7 @@ MAN+=	abort2.2 \
 	adjtime.2 \
 	aio_cancel.2 \
 	aio_error.2 \
+	aio_mlock.2 \
 	aio_read.2 \
 	aio_return.2 \
 	aio_suspend.2 \
Index: lib/libc/sys/Symbol.map
===================================================================
--- lib/libc/sys/Symbol.map	(revision 251369)
+++ lib/libc/sys/Symbol.map	(working copy)
@@ -379,6 +379,7 @@ FBSD_1.2 {
 
 FBSD_1.3 {
 	accept4;
+	aio_mlock;
 	bindat;
 	cap_fcntls_get;
 	cap_fcntls_limit;
Index: lib/libc/sys/aio_mlock.2
===================================================================
--- lib/libc/sys/aio_mlock.2	(revision 0)
+++ lib/libc/sys/aio_mlock.2	(working copy)
@@ -0,0 +1,133 @@
+.\" Copyright (c) 2013 Gleb Smirnoff <glebius@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd June 3, 2013
+.Dt AIO_MLOCK 2
+.Os
+.Sh NAME
+.Nm aio_mlock
+.Nd asynchronous
+.Xr mlock 2
+operation
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In aio.h
+.Ft int
+.Fn aio_mlock "struct aiocb *iocb"
+.Sh DESCRIPTION
+The
+.Fn aio_mlock
+system call allows the calling process to lock into memory the
+physical pages associated with the virtual address range starting at
+.Fa iocb->aio_buf
+for
+.Fa iocb->aio_nbytes
+bytes.
+The call returns immediately after the locking request has
+been enqueued; the operation may or may not have completed at the time
+the call returns.
+.Pp
+The
+.Fa iocb
+pointer may be subsequently used as an argument to
+.Fn aio_return
+and
+.Fn aio_error
+in order to determine return or error status for the enqueued operation
+while it is in progress.
+.Pp
+If the request could not be enqueued (generally due to 
+.Xr aio 4
+limits),
+then the call returns without having enqueued the request.
+.Sh RESTRICTIONS
+The Asynchronous I/O Control Block structure pointed to by
+.Fa iocb
+and the buffer that the
+.Fa iocb->aio_buf
+member of that structure references must remain valid until the
+operation has completed.
+For this reason, use of auto (stack) variables
+for these objects is discouraged.
+.Pp
+The asynchronous I/O control buffer
+.Fa iocb
+should be zeroed before the
+.Fn aio_mlock
+call to avoid passing bogus context information to the kernel.
+.Pp
+Modifications of the Asynchronous I/O Control Block structure or the
+buffer contents after the request has been enqueued, but before the
+request has completed, are not allowed.
+.Sh RETURN VALUES
+.Rv -std aio_mlock
+.Sh ERRORS
+The
+.Fn aio_read
+system call will fail if:
+.Bl -tag -width Er
+.It Bq Er EAGAIN
+The request was not queued because of system resource limitations.
+.It Bq Er ENOSYS
+The
+.Fn aio_mlock
+system call is not supported.
+.El
+.Pp
+If the request is successfully enqueued, but subsequently cancelled
+or an error occurs, the value returned by the
+.Fn aio_return
+system call is per the
+.Xr mlock 2
+system call, and the value returned by the
+.Fn aio_error
+system call is one of the error returns from the
+.Xr mlock 2
+system call, or
+.Er ECANCELED
+if the request was explicitly cancelled via a call to
+.Fn aio_cancel .
+.Sh SEE ALSO
+.Xr aio_cancel 2 ,
+.Xr aio_error 2 ,
+.Xr aio_return 2 ,
+.Xr aio 4 ,
+.Xr mlock 2
+.Sh PORTABILITY
+The
+.Fn aio_mlock
+system call is a
+.Fx
+extension, and should not be used in portable code.
+.Sh HISTORY
+The
+.Fn aio_mlock
+system call first appeared in
+.Fx 10.0 .
+.Sh AUTHORS
+The system call was introduced by
+.An Gleb Smirnoff Aq glebius@FreeBSD.org .

Property changes on: lib/libc/sys/aio_mlock.2
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+FreeBSD=%H
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: sys/compat/freebsd32/syscalls.master
===================================================================
--- sys/compat/freebsd32/syscalls.master	(revision 251369)
+++ sys/compat/freebsd32/syscalls.master	(working copy)
@@ -1044,3 +1044,5 @@
 				    __socklen_t * __restrict anamelen, \
 				    int flags); }
 542	AUE_PIPE	NOPROTO	{ int pipe2(int *fildes, int flags); }
+543	AUE_NULL	NOSTD	{ int freebsd32_aio_mlock( \
+				    struct aiocb32 *aiocbp); }
Index: sys/kern/syscalls.master
===================================================================
--- sys/kern/syscalls.master	(revision 251369)
+++ sys/kern/syscalls.master	(working copy)
@@ -977,5 +977,6 @@
 				    __socklen_t * __restrict anamelen, \
 				    int flags); }
 542	AUE_PIPE	STD	{ int pipe2(int *fildes, int flags); }
+543	AUE_NULL	NOSTD	{ int aio_mlock(struct aiocb *aiocbp); }
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master
Index: sys/kern/vfs_aio.c
===================================================================
--- sys/kern/vfs_aio.c	(revision 251369)
+++ sys/kern/vfs_aio.c	(working copy)
@@ -338,7 +338,9 @@ static struct unrhdr *aiod_unr;
 void		aio_init_aioinfo(struct proc *p);
 static int	aio_onceonly(void);
 static int	aio_free_entry(struct aiocblist *aiocbe);
-static void	aio_process(struct aiocblist *aiocbe);
+static void	aio_process_rw(struct aiocblist *aiocbe);
+static void	aio_process_sync(struct aiocblist *aiocbe);
+static void	aio_process_mlock(struct aiocblist *aiocbe);
 static int	aio_newproc(int *);
 int		aio_aqueue(struct thread *td, struct aiocb *job,
 			struct aioliojob *lio, int type, struct aiocb_ops *ops);
@@ -425,6 +427,7 @@ static struct syscall_helper_data aio_syscalls[] =
 	SYSCALL_INIT_HELPER(aio_cancel),
 	SYSCALL_INIT_HELPER(aio_error),
 	SYSCALL_INIT_HELPER(aio_fsync),
+	SYSCALL_INIT_HELPER(aio_mlock),
 	SYSCALL_INIT_HELPER(aio_read),
 	SYSCALL_INIT_HELPER(aio_return),
 	SYSCALL_INIT_HELPER(aio_suspend),
@@ -452,6 +455,7 @@ static struct syscall_helper_data aio32_syscalls[]
 	SYSCALL32_INIT_HELPER(freebsd32_aio_cancel),
 	SYSCALL32_INIT_HELPER(freebsd32_aio_error),
 	SYSCALL32_INIT_HELPER(freebsd32_aio_fsync),
+	SYSCALL32_INIT_HELPER(freebsd32_aio_mlock),
 	SYSCALL32_INIT_HELPER(freebsd32_aio_read),
 	SYSCALL32_INIT_HELPER(freebsd32_aio_write),
 	SYSCALL32_INIT_HELPER(freebsd32_aio_waitcomplete),
@@ -701,7 +705,8 @@ aio_free_entry(struct aiocblist *aiocbe)
 	 * at open time, but this is already true of file descriptors in
 	 * a multithreaded process.
 	 */
-	fdrop(aiocbe->fd_file, curthread);
+	if (aiocbe->fd_file)
+		fdrop(aiocbe->fd_file, curthread);
 	crfree(aiocbe->cred);
 	uma_zfree(aiocb_zone, aiocbe);
 	AIO_LOCK(ki);
@@ -855,15 +860,15 @@ drop:
 }
 
 /*
- * The AIO processing activity.  This is the code that does the I/O request for
- * the non-physio version of the operations.  The normal vn operations are used,
- * and this code should work in all instances for every type of file, including
- * pipes, sockets, fifos, and regular files.
+ * The AIO processing activity for LIO_READ/LIO_WRITE.  This is the code that
+ * does the I/O request for the non-physio version of the operations.  The
+ * normal vn operations are used, and this code should work in all instances
+ * for every type of file, including pipes, sockets, fifos, and regular files.
  *
  * XXX I don't think it works well for socket, pipe, and fifo.
  */
 static void
-aio_process(struct aiocblist *aiocbe)
+aio_process_rw(struct aiocblist *aiocbe)
 {
 	struct ucred *td_savedcred;
 	struct thread *td;
@@ -877,23 +882,16 @@ static void
 	int oublock_st, oublock_end;
 	int inblock_st, inblock_end;
 
+	KASSERT(aiocbe->uaiocb.aio_lio_opcode == LIO_READ ||
+	    aiocbe->uaiocb.aio_lio_opcode == LIO_WRITE,
+	    ("%s: opcode %d", __func__, aiocbe->uaiocb.aio_lio_opcode));
+
 	td = curthread;
 	td_savedcred = td->td_ucred;
 	td->td_ucred = aiocbe->cred;
 	cb = &aiocbe->uaiocb;
 	fp = aiocbe->fd_file;
 
-	if (cb->aio_lio_opcode == LIO_SYNC) {
-		error = 0;
-		cnt = 0;
-		if (fp->f_vnode != NULL)
-			error = aio_fsync_vnode(td, fp->f_vnode);
-		cb->_aiocb_private.error = error;
-		cb->_aiocb_private.status = 0;
-		td->td_ucred = td_savedcred;
-		return;
-	}
-
 	aiov.iov_base = (void *)(uintptr_t)cb->aio_buf;
 	aiov.iov_len = cb->aio_nbytes;
 
@@ -954,6 +952,41 @@ static void
 }
 
 static void
+aio_process_sync(struct aiocblist *aiocbe)
+{
+	struct thread *td = curthread;
+	struct ucred *td_savedcred = td->td_ucred;
+	struct aiocb *cb = &aiocbe->uaiocb;
+	struct file *fp = aiocbe->fd_file;
+	int error = 0;
+
+	KASSERT(aiocbe->uaiocb.aio_lio_opcode == LIO_SYNC,
+	    ("%s: opcode %d", __func__, aiocbe->uaiocb.aio_lio_opcode));
+
+	td->td_ucred = aiocbe->cred;
+	if (fp->f_vnode != NULL)
+		error = aio_fsync_vnode(td, fp->f_vnode);
+	cb->_aiocb_private.error = error;
+	cb->_aiocb_private.status = 0;
+	td->td_ucred = td_savedcred;
+}
+
+static void
+aio_process_mlock(struct aiocblist *aiocbe)
+{
+	struct aiocb *cb = &aiocbe->uaiocb;
+	int error;
+
+	KASSERT(aiocbe->uaiocb.aio_lio_opcode == LIO_MLOCK,
+	    ("%s: opcode %d", __func__, aiocbe->uaiocb.aio_lio_opcode));
+
+	error = vm_mlock(aiocbe->userproc, aiocbe->cred,
+	    __DEVOLATILE(void *, cb->aio_buf), cb->aio_nbytes);
+	cb->_aiocb_private.error = error;
+	cb->_aiocb_private.status = 0;
+}
+
+static void
 aio_bio_done_notify(struct proc *userp, struct aiocblist *aiocbe, int type)
 {
 	struct aioliojob *lj;
@@ -1024,7 +1057,7 @@ notification_done:
 }
 
 /*
- * The AIO daemon, most of the actual work is done in aio_process,
+ * The AIO daemon, most of the actual work is done in aio_process_*,
  * but the setup (and address space mgmt) is done in this routine.
  */
 static void
@@ -1121,7 +1154,18 @@ aio_daemon(void *_id)
 			ki = userp->p_aioinfo;
 
 			/* Do the I/O function. */
-			aio_process(aiocbe);
+			switch(aiocbe->uaiocb.aio_lio_opcode) {
+			case LIO_READ:
+			case LIO_WRITE:
+				aio_process_rw(aiocbe);
+				break;
+			case LIO_SYNC:
+				aio_process_sync(aiocbe);
+				break;
+			case LIO_MLOCK:
+				aio_process_mlock(aiocbe);
+				break;
+			}
 
 			mtx_lock(&aio_job_mtx);
 			/* Decrement the active job count. */
@@ -1261,7 +1305,7 @@ aio_qphysio(struct proc *p, struct aiocblist *aioc
 	cb = &aiocbe->uaiocb;
 	fp = aiocbe->fd_file;
 
-	if (fp->f_type != DTYPE_VNODE)
+	if (fp == NULL || fp->f_type != DTYPE_VNODE)
 		return (-1);
 
 	vp = fp->f_vnode;
@@ -1613,6 +1657,9 @@ aio_aqueue(struct thread *td, struct aiocb *job, s
 	case LIO_SYNC:
 		error = fget(td, fd, CAP_FSYNC, &fp);
 		break;
+	case LIO_MLOCK:
+		fp = NULL;
+		break;
 	case LIO_NOP:
 		error = fget(td, fd, CAP_NONE, &fp);
 		break;
@@ -1670,7 +1717,8 @@ aio_aqueue(struct thread *td, struct aiocb *job, s
 	error = kqfd_register(kqfd, &kev, td, 1);
 aqueue_fail:
 	if (error) {
-		fdrop(fp, td);
+		if (fp)
+			fdrop(fp, td);
 		uma_zfree(aiocb_zone, aiocbe);
 		ops->store_error(job, error);
 		goto done;
@@ -1687,7 +1735,7 @@ no_kqueue:
 	if (opcode == LIO_SYNC)
 		goto queueit;
 
-	if (fp->f_type == DTYPE_SOCKET) {
+	if (fp && fp->f_type == DTYPE_SOCKET) {
 		/*
 		 * Alternate queueing for socket ops: Reach down into the
 		 * descriptor to get the socket data.  Then check to see if the
@@ -2165,6 +2213,13 @@ sys_aio_write(struct thread *td, struct aio_write_
 	return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops));
 }
 
+int
+sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap)
+{
+
+	return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops));
+}
+
 static int
 kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list,
     struct aiocb **acb_list, int nent, struct sigevent *sig,
@@ -2907,6 +2962,14 @@ freebsd32_aio_write(struct thread *td, struct free
 }
 
 int
+freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap)
+{
+
+	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK,
+	    &aiocb32_ops));
+}
+
+int
 freebsd32_aio_waitcomplete(struct thread *td,
     struct freebsd32_aio_waitcomplete_args *uap)
 {
Index: sys/sys/aio.h
===================================================================
--- sys/sys/aio.h	(revision 251369)
+++ sys/sys/aio.h	(working copy)
@@ -37,6 +37,7 @@
 #define	LIO_READ		0x2
 #ifdef _KERNEL
 #define	LIO_SYNC		0x3
+#define	LIO_MLOCK		0x4
 #endif
 
 /*
@@ -124,6 +125,11 @@ int	aio_cancel(int, struct aiocb *);
  */
 int	aio_suspend(const struct aiocb * const[], int, const struct timespec *);
 
+/*
+ * Asynchronous mlock
+ */
+int	aio_mlock(struct aiocb *);
+
 #ifdef __BSD_VISIBLE
 int	aio_waitcomplete(struct aiocb **, struct timespec *);
 #endif
Index: sys/vm/vm_extern.h
===================================================================
--- sys/vm/vm_extern.h	(revision 251369)
+++ sys/vm/vm_extern.h	(working copy)
@@ -90,5 +90,6 @@ struct sf_buf *vm_imgact_map_page(vm_object_t obje
 void vm_imgact_unmap_page(struct sf_buf *sf);
 void vm_thread_dispose(struct thread *td);
 int vm_thread_new(struct thread *td, int pages);
+int vm_mlock(struct proc *, struct ucred *, const void *, size_t);
 #endif				/* _KERNEL */
 #endif				/* !_VM_EXTERN_H_ */
Index: sys/vm/vm_mmap.c
===================================================================
--- sys/vm/vm_mmap.c	(revision 251369)
+++ sys/vm/vm_mmap.c	(working copy)
@@ -1036,18 +1036,24 @@ sys_mlock(td, uap)
 	struct thread *td;
 	struct mlock_args *uap;
 {
-	struct proc *proc;
+
+	return (vm_mlock(td->td_proc, td->td_ucred, uap->addr, uap->len));
+}
+
+int
+vm_mlock(struct proc *proc, struct ucred *cred, const void *addr0, size_t len)
+{
 	vm_offset_t addr, end, last, start;
 	vm_size_t npages, size;
 	vm_map_t map;
 	unsigned long nsize;
 	int error;
 
-	error = priv_check(td, PRIV_VM_MLOCK);
+	error = priv_check_cred(cred, PRIV_VM_MLOCK, 0);
 	if (error)
 		return (error);
-	addr = (vm_offset_t)uap->addr;
-	size = uap->len;
+	addr = (vm_offset_t)addr0;
+	size = len;
 	last = addr + size;
 	start = trunc_page(addr);
 	end = round_page(last);
@@ -1056,7 +1062,6 @@ sys_mlock(td, uap)
 	npages = atop(end - start);
 	if (npages > vm_page_max_wired)
 		return (ENOMEM);
-	proc = td->td_proc;
 	map = &proc->p_vmspace->vm_map;
 	PROC_LOCK(proc);
 	nsize = ptoa(npages + pmap_wired_count(map->pmap));

--2nTeH+t2PBomgucg--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130605065256.GZ67170>