Date: Mon, 13 Jan 2003 20:11:42 -0800 (PST) From: Matthew Dillon <dillon@apollo.backplane.com> To: "Alan L. Cox" <alc@imimic.com> Cc: Peter Wemm <peter@wemm.org>, arch@FreeBSD.ORG Subject: getsysfd() patch #1 (Re: Virtual memory question) Message-ID: <200301140411.h0E4BgpN078032@apollo.backplane.com> References: <20030114002831.1C8C12A89E@canning.wemm.org> <3E2381F8.85BB90A0@imimic.com>
next in thread | previous in thread | raw e-mail | index | archive | help
This is a first-attempt workup of getsysfd(). See? I told ya it was
trivial!
This isn't everything. If we really want to do this right we need to
create a filesystem inode type to represent a memory rendezvous,
similar to how we represent a FIFO or SOCKET rendezvous. If we do that
then we can support all shm_open() situations using this new call.
I have only done a small amount of testing, I have not double checked that
I handle the reference counts properly and I had to reorganize
mmap() quite a bit (in fact, it looks like someone did a bunch of
rewriting in the mmap()/vm_mmap() code and we really need to rewrite
the layering).
Here is a test program. The patch is below this program. This should be
considered a 'test' patch for the moment, my heart isn't set on the
interface. e.g. perhaps we want to add additional arguments to make it
more useful/generic.
-Matt
#include <sys/types.h>
#include <sys/sysfd.h>
#include <sys/mman.h>
#include <errno.h>
#include <stdio.h>
int
main(int ac, char **av)
{
int fd = getsysfd(SYSFD_MEMORY, 1024*1024);
char *ptr1;
char *ptr2;
printf("fd = %d %d %s\n", fd, errno, strerror(errno));
errno = 0;
ptr1 = mmap(NULL, 1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
printf("mmap: %p (%s)\n", ptr1, strerror(errno));
errno = 0;
ptr2 = mmap(NULL, 1024*1024, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
printf("mmap: %p (%s)\n", ptr2, strerror(errno));
close(fd);
ptr1[0] = 1;
ptr1[1024*1024-1] = 2;
if (fork() == 0) {
printf("CONTENTS %d %d\n", ptr2[0], ptr2[1024*1024-1]);
ptr2[0] = 2; /* modify private mapping */
ptr1[1024*1024-1] = 3; /* modify original */
}
sleep(1);
/* SHOULD BE 1 3 */
printf("ORIGCONTENTS %d %d\n", ptr1[0], ptr1[1024*1024-1]);
return(0);
}
Index: conf/files
===================================================================
RCS file: /home/ncvs/src/sys/conf/files,v
retrieving revision 1.744
diff -u -r1.744 files
--- conf/files 8 Jan 2003 23:36:59 -0000 1.744
+++ conf/files 14 Jan 2003 02:30:47 -0000
@@ -1055,6 +1055,7 @@
kern/subr_xxx.c standard
kern/sys_generic.c standard
kern/sys_pipe.c standard
+kern/sys_sysfd.c standard
kern/sys_process.c standard
kern/sys_socket.c standard
kern/syscalls.c optional witness
Index: kern/init_sysent.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.146
diff -u -r1.146 init_sysent.c
--- kern/init_sysent.c 8 Jan 2003 04:57:52 -0000 1.146
+++ kern/init_sysent.c 14 Jan 2003 01:58:05 -0000
@@ -2,7 +2,7 @@
* System call switch table.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/kern/init_sysent.c,v 1.146 2003/01/08 04:57:52 davidxu Exp $
+ * $FreeBSD$
* created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp
*/
@@ -457,4 +457,5 @@
{ SYF_MPSAFE | AS(__acl_set_link_args), (sy_call_t *)__acl_set_link }, /* 426 = __acl_set_link */
{ SYF_MPSAFE | AS(__acl_delete_link_args), (sy_call_t *)__acl_delete_link }, /* 427 = __acl_delete_link */
{ SYF_MPSAFE | AS(__acl_aclcheck_link_args), (sy_call_t *)__acl_aclcheck_link }, /* 428 = __acl_aclcheck_link */
+ { SYF_MPSAFE | AS(getsysfd_args), (sy_call_t *)getsysfd }, /* 429 = getsysfd */
};
Index: kern/sys_sysfd.c
===================================================================
RCS file: kern/sys_sysfd.c
diff -N kern/sys_sysfd.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ kern/sys_sysfd.c 14 Jan 2003 03:47:53 -0000
@@ -0,0 +1,208 @@
+/*
+ * KERN/SYS_SYSFD.C
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/filio.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mac.h>
+#include <sys/mutex.h>
+#include <sys/ttycom.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/selinfo.h>
+#include <sys/signalvar.h>
+#include <sys/sysproto.h>
+#include <sys/pipe.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/uio.h>
+#include <sys/event.h>
+#include <sys/sysfd.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/uma.h>
+
+/*
+ * interfaces to the outside world
+ */
+static fo_rdwr_t memfd_read;
+static fo_rdwr_t memfd_write;
+static fo_ioctl_t memfd_ioctl;
+static fo_poll_t memfd_poll;
+static fo_stat_t memfd_stat;
+static fo_close_t memfd_close;
+
+static struct fileops memfdops = {
+ memfd_read, memfd_write, memfd_ioctl, memfd_poll, NULL,
+ memfd_stat, memfd_close
+};
+
+/*
+ * The getsysfd() system call. getsysfd(int type, off_t size)
+ *
+ * SYSFD_MEMORY - Return a descriptor which can be mmap()'d,
+ * representing anonymous, shareable swap-backed
+ * memory.
+ *
+ */
+
+int
+getsysfd(struct thread *td, struct getsysfd_args *uap)
+{
+ int error;
+ int fd;
+ vm_pindex_t npages;
+ struct file *fp;
+ struct filedesc *fdp;
+
+ /*
+ * Validate the size
+ */
+ printf("GETSYSFD %d %lld\n", uap->type, (long long)uap->size);
+ if (uap->size < 0)
+ return(EINVAL);
+ npages = round_page(uap->size) >> PAGE_SHIFT;
+
+ /*
+ * Allocate a new descriptor. the descriptor will be returned with a
+ * reference associated with fd_ofiles[fd].
+ *
+ * XXX falloc() really should return with two references on the desc,
+ * not one, so it can't be ripped out from under us.
+ */
+ error = falloc(td, &fp, &fd);
+ if (error)
+ return(error);
+ fhold(fp);
+ FILE_LOCK(fp);
+ fp->f_flag = FREAD | FWRITE;
+
+ switch(uap->type) {
+ case SYSFD_MEMORY:
+ fp->f_type = DTYPE_MEMFD;
+ fp->f_data = vm_object_allocate(OBJT_DEFAULT, npages);
+ fp->f_ops = &memfdops;
+ if (fp->f_data == NULL)
+ error = ENOMEM;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ FILE_UNLOCK(fp);
+ if (error) {
+ fdp = td->td_proc->p_fd;
+ FILEDESC_LOCK(fdp);
+ if (fdp->fd_ofiles[fd] == fp) {
+ fdp->fd_ofiles[fd] = NULL;
+ fdp->fd_ofileflags[fd] = 0;
+ fdrop(fp, td); /* drop ofiles[] array reference */
+ if (fd < fdp->fd_freefile)
+ fdp->fd_freefile = fd;
+ }
+ FILEDESC_UNLOCK(fdp);
+ /* closef(fp, td); NOT NECESSARY */
+ } else {
+ td->td_retval[0] = fd;
+ }
+ fdrop(fp, td); /* drop our reference */
+ return(error);
+}
+
+/* ARGSUSED */
+static int
+memfd_read(fp, uio, active_cred, flags, td)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *active_cred;
+ struct thread *td;
+ int flags;
+{
+ return(EOPNOTSUPP);
+}
+
+static int
+memfd_write(fp, uio, active_cred, flags, td)
+ struct file *fp;
+ struct uio *uio;
+ struct ucred *active_cred;
+ struct thread *td;
+ int flags;
+{
+ return(EOPNOTSUPP);
+}
+
+/*
+ * we implement a very minimal set of ioctls for compatibility with sockets.
+ */
+static int
+memfd_ioctl(fp, cmd, data, active_cred, td)
+ struct file *fp;
+ u_long cmd;
+ void *data;
+ struct ucred *active_cred;
+ struct thread *td;
+{
+ return(EINVAL);
+}
+
+static int
+memfd_poll(fp, events, active_cred, td)
+ struct file *fp;
+ int events;
+ struct ucred *active_cred;
+ struct thread *td;
+{
+ return(0);
+}
+
+/*
+ * We shouldn't need locks here as we're doing a read and this should
+ * be a natural race.
+ */
+static int
+memfd_stat(fp, ub, active_cred, td)
+ struct file *fp;
+ struct stat *ub;
+ struct ucred *active_cred;
+ struct thread *td;
+{
+ return(EOPNOTSUPP);
+}
+
+/* ARGSUSED */
+static int
+memfd_close(fp, td)
+ struct file *fp;
+ struct thread *td;
+{
+ vm_object_t object;
+
+ FILE_LOCK(fp);
+ object = fp->f_data;
+ fp->f_data = NULL;
+ FILE_UNLOCK(fp);
+
+ mtx_lock(&Giant);
+ if (object)
+ vm_object_deallocate(object);
+ mtx_unlock(&Giant);
+ return(0);
+}
+
Index: kern/syscalls.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/syscalls.c,v
retrieving revision 1.132
diff -u -r1.132 syscalls.c
--- kern/syscalls.c 8 Jan 2003 04:57:52 -0000 1.132
+++ kern/syscalls.c 14 Jan 2003 01:58:05 -0000
@@ -2,7 +2,7 @@
* System call names.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/kern/syscalls.c,v 1.132 2003/01/08 04:57:52 davidxu Exp $
+ * $FreeBSD$
* created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp
*/
@@ -436,4 +436,5 @@
"__acl_set_link", /* 426 = __acl_set_link */
"__acl_delete_link", /* 427 = __acl_delete_link */
"__acl_aclcheck_link", /* 428 = __acl_aclcheck_link */
+ "getsysfd", /* 429 = getsysfd */
};
Index: kern/syscalls.master
===================================================================
RCS file: /home/ncvs/src/sys/kern/syscalls.master,v
retrieving revision 1.140
diff -u -r1.140 syscalls.master
--- kern/syscalls.master 4 Jan 2003 11:41:12 -0000 1.140
+++ kern/syscalls.master 14 Jan 2003 01:58:03 -0000
@@ -621,6 +621,7 @@
acl_type_t type); }
428 MSTD BSD { int __acl_aclcheck_link(const char *path, \
acl_type_t type, struct acl *aclp); }
+429 MSTD BSD { int getsysfd(int type, off_t size); }
; Please copy any additions and changes to the following compatability tables:
; sys/ia64/ia32/syscalls.master (take a best guess)
Index: sys/file.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/file.h,v
retrieving revision 1.59
diff -u -r1.59 file.h
--- sys/file.h 13 Jan 2003 00:28:55 -0000 1.59
+++ sys/file.h 14 Jan 2003 02:04:13 -0000
@@ -62,6 +62,7 @@
#define DTYPE_FIFO 4 /* fifo (named pipe) */
#define DTYPE_KQUEUE 5 /* event queue */
#define DTYPE_CRYPTO 6 /* crypto */
+#define DTYPE_MEMFD 7 /* memory descriptor */
#ifdef _KERNEL
Index: sys/syscall.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/syscall.h,v
retrieving revision 1.130
diff -u -r1.130 syscall.h
--- sys/syscall.h 8 Jan 2003 04:57:52 -0000 1.130
+++ sys/syscall.h 14 Jan 2003 01:58:05 -0000
@@ -2,7 +2,7 @@
* System call numbers.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/sys/syscall.h,v 1.130 2003/01/08 04:57:52 davidxu Exp $
+ * $FreeBSD$
* created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp
*/
@@ -334,4 +334,5 @@
#define SYS___acl_set_link 426
#define SYS___acl_delete_link 427
#define SYS___acl_aclcheck_link 428
-#define SYS_MAXSYSCALL 429
+#define SYS_getsysfd 429
+#define SYS_MAXSYSCALL 430
Index: sys/syscall.mk
===================================================================
RCS file: /home/ncvs/src/sys/sys/syscall.mk,v
retrieving revision 1.85
diff -u -r1.85 syscall.mk
--- sys/syscall.mk 8 Jan 2003 04:57:52 -0000 1.85
+++ sys/syscall.mk 14 Jan 2003 01:58:05 -0000
@@ -1,6 +1,6 @@
# FreeBSD system call names.
# DO NOT EDIT-- this file is automatically generated.
-# $FreeBSD: src/sys/sys/syscall.mk,v 1.85 2003/01/08 04:57:52 davidxu Exp $
+# $FreeBSD$
# created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp
MIASM = \
syscall.o \
@@ -279,4 +279,5 @@
__acl_get_link.o \
__acl_set_link.o \
__acl_delete_link.o \
- __acl_aclcheck_link.o
+ __acl_aclcheck_link.o \
+ getsysfd.o
Index: sys/sysfd.h
===================================================================
RCS file: sys/sysfd.h
diff -N sys/sysfd.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ sys/sysfd.h 14 Jan 2003 04:06:19 -0000
@@ -0,0 +1,21 @@
+/*
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_SYSFD_H_
+#define _SYS_SYSFD_H_
+
+#define SYSFD_MEMORY 1
+#ifdef NOTYET
+#define SYSFD_TIMER_SECS 2
+#define SYSFD_TIMER_TENS 3
+#define SYSFD_TIMER_MICRO 4
+#define SYSFD_TIMER_SYS 5
+#define SYSFD_TIMER_REAL 6
+#define SYSFD_TIMER_VIRT 7
+#endif
+
+#endif /* _SYS_SYSFD_H_ */
+
+extern int getsysfd(int type, off_t size);
+
Index: sys/sysproto.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/sysproto.h,v
retrieving revision 1.123
diff -u -r1.123 sysproto.h
--- sys/sysproto.h 8 Jan 2003 04:57:53 -0000 1.123
+++ sys/sysproto.h 14 Jan 2003 01:58:05 -0000
@@ -2,7 +2,7 @@
* System call prototypes.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/sys/sysproto.h,v 1.123 2003/01/08 04:57:53 davidxu Exp $
+ * $FreeBSD$
* created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp
*/
@@ -1223,6 +1223,10 @@
char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)];
char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)];
};
+struct getsysfd_args {
+ char type_l_[PADL_(int)]; int type; char type_r_[PADR_(int)];
+ char size_l_[PADL_(off_t)]; off_t size; char size_r_[PADR_(off_t)];
+};
int nosys(struct thread *, struct nosys_args *);
void sys_exit(struct thread *, struct sys_exit_args *);
int fork(struct thread *, struct fork_args *);
@@ -1499,6 +1503,7 @@
int __acl_set_link(struct thread *, struct __acl_set_link_args *);
int __acl_delete_link(struct thread *, struct __acl_delete_link_args *);
int __acl_aclcheck_link(struct thread *, struct __acl_aclcheck_link_args *);
+int getsysfd(struct thread *, struct getsysfd_args *);
#ifdef COMPAT_43
Index: vm/vm_extern.h
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_extern.h,v
retrieving revision 1.59
diff -u -r1.59 vm_extern.h
--- vm/vm_extern.h 24 Jul 2002 19:47:56 -0000 1.59
+++ vm/vm_extern.h 14 Jan 2003 03:12:06 -0000
@@ -80,6 +80,7 @@
void vm_forkproc(struct thread *, struct proc *, struct thread *, int);
void vm_waitproc(struct proc *);
int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, void *, vm_ooffset_t);
+int vm_mmap_object(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, vm_object_t, vm_ooffset_t);
vm_offset_t vm_page_alloc_contig(vm_offset_t, vm_offset_t, vm_offset_t, vm_offset_t);
void vm_set_page_size(void);
struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t);
Index: vm/vm_mmap.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_mmap.c,v
retrieving revision 1.155
diff -u -r1.155 vm_mmap.c
--- vm/vm_mmap.c 13 Jan 2003 00:28:55 -0000 1.155
+++ vm/vm_mmap.c 14 Jan 2003 03:55:15 -0000
@@ -201,7 +201,7 @@
struct thread *td;
struct mmap_args *uap;
{
- struct file *fp = NULL;
+ struct file *fp;
struct vnode *vp;
vm_offset_t addr;
vm_size_t size, pageoff;
@@ -264,49 +264,101 @@
return (EINVAL);
if (addr + size < addr)
return (EINVAL);
- }
- /*
- * XXX for non-fixed mappings where no hint is provided or
- * the hint would fall in the potential heap space,
- * place it after the end of the largest possible heap.
- *
- * There should really be a pmap call to determine a reasonable
- * location.
- */
- else if (addr == 0 ||
+ } else if (addr == 0 ||
(addr >= round_page((vm_offset_t)vms->vm_taddr) &&
- addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz)))
+ addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz))) {
+ /*
+ * XXX for non-fixed mappings where no hint is provided or
+ * the hint would fall in the potential heap space,
+ * place it after the end of the largest possible heap.
+ *
+ * There should really be a pmap call to determine a reasonable
+ * location.
+ */
addr = round_page((vm_offset_t)vms->vm_daddr + maxdsiz);
+ }
mtx_lock(&Giant); /* syscall marked mp-safe but isn't */
+
+ /*
+ * Do not allow more then a certain number of vm_map_entry structures
+ * per process. Scale with the number of rforks sharing the map
+ * to make the limit reasonable for threads.
+ */
+ if (max_proc_mmap &&
+ vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) {
+ error = ENOMEM;
+ goto done;
+ }
+
+ /*
+ * Extract the file descriptor (if not an anonymous mmap)
+ */
if (flags & MAP_ANON) {
/*
* Mapping blank space is trivial.
*/
- handle = NULL;
maxprot = VM_PROT_ALL;
pos = 0;
} else {
/*
- * Mapping file, get fp for validation. Obtain vnode and make
- * sure it is of appropriate type.
- * don't let the descriptor disappear on us if we block
+ * Mapping a file descriptor. Reference the fp so it does
+ * not go away on us.
*/
if ((error = fget(td, uap->fd, &fp)) != 0)
goto done;
- if (fp->f_type != DTYPE_VNODE) {
- error = EINVAL;
- goto done;
- }
/*
- * POSIX shared-memory objects are defined to have
- * kernel persistence, and are not defined to support
- * read(2)/write(2) -- or even open(2). Thus, we can
- * use MAP_ASYNC to trade on-disk coherence for speed.
- * The shm_open(3) library routine turns on the FPOSIXSHM
- * flag to request this behavior.
+ * Ensure that file and memory protections are
+ * compatible. Note that we only worry about
+ * writability if mapping is shared; in this case,
+ * current and max prot are dictated by the open file.
+ * XXX use the vnode instead? Problem is: what
+ * credentials do we use for determination? What if
+ * proc does a setuid?
*/
+ maxprot = VM_PROT_EXECUTE; /* ??? */
+ if (fp->f_flag & FREAD) {
+ maxprot |= VM_PROT_READ;
+ } else if (prot & PROT_READ) {
+ error = EACCES;
+ goto done;
+ }
+ }
+
+ /*
+ * Handle MEMFD descriptors. These reference the VM object directly.
+ */
+ if (fp && fp->f_type == DTYPE_MEMFD && fp->f_data) {
+ mtx_unlock(&Giant);
+ obj = fp->f_data;
+ vm_object_reference(obj);
+ error = vm_mmap_object(&vms->vm_map, &addr, size, prot,
+ maxprot, flags, obj, pos);
+ if (error == 0)
+ td->td_retval[0] = (register_t) (addr + pageoff);
+ mtx_lock(&Giant);
+ vm_object_deallocate(obj);
+ goto done2;
+ }
+
+ /*
+ * Otherwise it must be an anonymous mapping or a VNODE
+ */
+ if (fp != NULL && fp->f_type != DTYPE_VNODE) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * POSIX shared-memory objects are defined to have
+ * kernel persistence, and are not defined to support
+ * read(2)/write(2) -- or even open(2). Thus, we can
+ * use MAP_ASYNC to trade on-disk coherence for speed.
+ * The shm_open(3) library routine turns on the FPOSIXSHM
+ * flag to request this behavior.
+ */
+ if (fp) {
if (fp->f_flag & FPOSIXSHM)
flags |= MAP_NOSYNC;
vp = fp->f_data;
@@ -363,22 +415,7 @@
error = EINVAL;
goto done;
}
- /*
- * Ensure that file and memory protections are
- * compatible. Note that we only worry about
- * writability if mapping is shared; in this case,
- * current and max prot are dictated by the open file.
- * XXX use the vnode instead? Problem is: what
- * credentials do we use for determination? What if
- * proc does a setuid?
- */
- maxprot = VM_PROT_EXECUTE; /* ??? */
- if (fp->f_flag & FREAD) {
- maxprot |= VM_PROT_READ;
- } else if (prot & PROT_READ) {
- error = EACCES;
- goto done;
- }
+
/*
* If we are sharing potential changes (either via
* MAP_SHARED or via the implicit sharing of character
@@ -414,17 +451,8 @@
handle = (void *)vp;
}
- }
-
- /*
- * Do not allow more then a certain number of vm_map_entry structures
- * per process. Scale with the number of rforks sharing the map
- * to make the limit reasonable for threads.
- */
- if (max_proc_mmap &&
- vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) {
- error = ENOMEM;
- goto done;
+ } else {
+ handle = NULL;
}
mtx_unlock(&Giant);
@@ -444,10 +472,10 @@
done:
if (vp)
vput(vp);
+done2:
mtx_unlock(&Giant);
if (fp)
fdrop(fp, td);
-
return (error);
}
@@ -1272,3 +1300,102 @@
return (EINVAL);
}
}
+
+/*
+ * vm_mmap_object()
+ *
+ * MPSAFE
+ *
+ * Internal version of mmap that directly operates on a VM object.
+ * Currently used by mmap.
+ */
+int
+vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
+ vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff)
+{
+ boolean_t fitit;
+ int rv = KERN_SUCCESS;
+ int docow;
+ struct thread *td = curthread;
+
+ if (size == 0)
+ return (0);
+
+ size = round_page(size);
+
+ if (td->td_proc->p_vmspace->vm_map.size + size >
+ td->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
+ return(ENOMEM);
+ }
+
+ /*
+ * We currently can only deal with page aligned file offsets.
+ * The check is here rather than in the syscall because the
+ * kernel calls this function internally for other mmaping
+ * operations (such as in exec) and non-aligned offsets will
+ * cause pmap inconsistencies...so we want to be sure to
+ * disallow this in all cases.
+ */
+ if (foff & PAGE_MASK)
+ return (EINVAL);
+
+ if ((flags & MAP_FIXED) == 0) {
+ fitit = TRUE;
+ *addr = round_page(*addr);
+ } else {
+ if (*addr != trunc_page(*addr))
+ return (EINVAL);
+ fitit = FALSE;
+ (void) vm_map_remove(map, *addr, *addr + size);
+ }
+
+ docow = MAP_PREFAULT_PARTIAL;
+
+ if ((flags & (MAP_ANON|MAP_SHARED)) == 0)
+ docow |= MAP_COPY_ON_WRITE;
+ if (flags & MAP_NOCORE)
+ docow |= MAP_DISABLE_COREDUMP;
+
+#if defined(VM_PROT_READ_IS_EXEC)
+ if (prot & VM_PROT_READ)
+ prot |= VM_PROT_EXECUTE;
+
+ if (maxprot & VM_PROT_READ)
+ maxprot |= VM_PROT_EXECUTE;
+#endif
+
+ if (fitit)
+ *addr = pmap_addr_hint(object, *addr, size);
+
+ vm_object_reference(object);
+ if (flags & MAP_STACK) {
+ rv = vm_map_stack (map, *addr, size, prot, maxprot, docow);
+ } else {
+ rv = vm_map_find(map, object, foff, addr, size, fitit,
+ prot, maxprot, docow);
+ }
+ if (rv != KERN_SUCCESS)
+ vm_object_deallocate(object);
+
+ switch(rv) {
+ case KERN_SUCCESS:
+ if (flags & MAP_SHARED) {
+ /*
+ * Shared memory is also shared with children.
+ */
+ rv = vm_map_inherit(map, *addr, *addr + size,
+ VM_INHERIT_SHARE);
+ if (rv != KERN_SUCCESS)
+ (void)vm_map_remove(map, *addr, *addr + size);
+ }
+ return(0);
+ case KERN_INVALID_ADDRESS:
+ case KERN_NO_SPACE:
+ return (ENOMEM);
+ case KERN_PROTECTION_FAILURE:
+ return (EACCES);
+ default:
+ return (EINVAL);
+ }
+}
+
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-arch" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200301140411.h0E4BgpN078032>
