From owner-freebsd-arch Mon Jan 13 20:12: 1 2003 Delivered-To: freebsd-arch@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 6199D37B401 for ; Mon, 13 Jan 2003 20:11:43 -0800 (PST) Received: from apollo.backplane.com (apollo.backplane.com [216.240.41.2]) by mx1.FreeBSD.org (Postfix) with ESMTP id 8651D43F3F for ; Mon, 13 Jan 2003 20:11:42 -0800 (PST) (envelope-from dillon@apollo.backplane.com) Received: from apollo.backplane.com (localhost [127.0.0.1]) by apollo.backplane.com (8.12.6/8.12.6) with ESMTP id h0E4Bg0i078033; Mon, 13 Jan 2003 20:11:42 -0800 (PST) (envelope-from dillon@apollo.backplane.com) Received: (from dillon@localhost) by apollo.backplane.com (8.12.6/8.12.6/Submit) id h0E4BgpN078032; Mon, 13 Jan 2003 20:11:42 -0800 (PST) Date: Mon, 13 Jan 2003 20:11:42 -0800 (PST) From: Matthew Dillon Message-Id: <200301140411.h0E4BgpN078032@apollo.backplane.com> To: "Alan L. Cox" Cc: Peter Wemm , arch@FreeBSD.ORG Subject: getsysfd() patch #1 (Re: Virtual memory question) References: <20030114002831.1C8C12A89E@canning.wemm.org> <3E2381F8.85BB90A0@imimic.com> Sender: owner-freebsd-arch@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG This is a first-attempt workup of getsysfd(). See? I told ya it was trivial! This isn't everything. If we really want to do this right we need to create a filesystem inode type to represent a memory rendezvous, similar to how we represent a FIFO or SOCKET rendezvous. If we do that then we can support all shm_open() situations using this new call. I have only done a small amount of testing, I have not double checked that I handle the reference counts properly and I had to reorganize mmap() quite a bit (in fact, it looks like someone did a bunch of rewriting in the mmap()/vm_mmap() code and we really need to rewrite the layering). Here is a test program. The patch is below this program. This should be considered a 'test' patch for the moment, my heart isn't set on the interface. e.g. perhaps we want to add additional arguments to make it more useful/generic. -Matt #include #include #include #include #include int main(int ac, char **av) { int fd = getsysfd(SYSFD_MEMORY, 1024*1024); char *ptr1; char *ptr2; printf("fd = %d %d %s\n", fd, errno, strerror(errno)); errno = 0; ptr1 = mmap(NULL, 1024*1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); printf("mmap: %p (%s)\n", ptr1, strerror(errno)); errno = 0; ptr2 = mmap(NULL, 1024*1024, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); printf("mmap: %p (%s)\n", ptr2, strerror(errno)); close(fd); ptr1[0] = 1; ptr1[1024*1024-1] = 2; if (fork() == 0) { printf("CONTENTS %d %d\n", ptr2[0], ptr2[1024*1024-1]); ptr2[0] = 2; /* modify private mapping */ ptr1[1024*1024-1] = 3; /* modify original */ } sleep(1); /* SHOULD BE 1 3 */ printf("ORIGCONTENTS %d %d\n", ptr1[0], ptr1[1024*1024-1]); return(0); } Index: conf/files =================================================================== RCS file: /home/ncvs/src/sys/conf/files,v retrieving revision 1.744 diff -u -r1.744 files --- conf/files 8 Jan 2003 23:36:59 -0000 1.744 +++ conf/files 14 Jan 2003 02:30:47 -0000 @@ -1055,6 +1055,7 @@ kern/subr_xxx.c standard kern/sys_generic.c standard kern/sys_pipe.c standard +kern/sys_sysfd.c standard kern/sys_process.c standard kern/sys_socket.c standard kern/syscalls.c optional witness Index: kern/init_sysent.c =================================================================== RCS file: /home/ncvs/src/sys/kern/init_sysent.c,v retrieving revision 1.146 diff -u -r1.146 init_sysent.c --- kern/init_sysent.c 8 Jan 2003 04:57:52 -0000 1.146 +++ kern/init_sysent.c 14 Jan 2003 01:58:05 -0000 @@ -2,7 +2,7 @@ * System call switch table. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/kern/init_sysent.c,v 1.146 2003/01/08 04:57:52 davidxu Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp */ @@ -457,4 +457,5 @@ { SYF_MPSAFE | AS(__acl_set_link_args), (sy_call_t *)__acl_set_link }, /* 426 = __acl_set_link */ { SYF_MPSAFE | AS(__acl_delete_link_args), (sy_call_t *)__acl_delete_link }, /* 427 = __acl_delete_link */ { SYF_MPSAFE | AS(__acl_aclcheck_link_args), (sy_call_t *)__acl_aclcheck_link }, /* 428 = __acl_aclcheck_link */ + { SYF_MPSAFE | AS(getsysfd_args), (sy_call_t *)getsysfd }, /* 429 = getsysfd */ }; Index: kern/sys_sysfd.c =================================================================== RCS file: kern/sys_sysfd.c diff -N kern/sys_sysfd.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ kern/sys_sysfd.c 14 Jan 2003 03:47:53 -0000 @@ -0,0 +1,208 @@ +/* + * KERN/SYS_SYSFD.C + * + * $FreeBSD$ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * interfaces to the outside world + */ +static fo_rdwr_t memfd_read; +static fo_rdwr_t memfd_write; +static fo_ioctl_t memfd_ioctl; +static fo_poll_t memfd_poll; +static fo_stat_t memfd_stat; +static fo_close_t memfd_close; + +static struct fileops memfdops = { + memfd_read, memfd_write, memfd_ioctl, memfd_poll, NULL, + memfd_stat, memfd_close +}; + +/* + * The getsysfd() system call. getsysfd(int type, off_t size) + * + * SYSFD_MEMORY - Return a descriptor which can be mmap()'d, + * representing anonymous, shareable swap-backed + * memory. + * + */ + +int +getsysfd(struct thread *td, struct getsysfd_args *uap) +{ + int error; + int fd; + vm_pindex_t npages; + struct file *fp; + struct filedesc *fdp; + + /* + * Validate the size + */ + printf("GETSYSFD %d %lld\n", uap->type, (long long)uap->size); + if (uap->size < 0) + return(EINVAL); + npages = round_page(uap->size) >> PAGE_SHIFT; + + /* + * Allocate a new descriptor. the descriptor will be returned with a + * reference associated with fd_ofiles[fd]. + * + * XXX falloc() really should return with two references on the desc, + * not one, so it can't be ripped out from under us. + */ + error = falloc(td, &fp, &fd); + if (error) + return(error); + fhold(fp); + FILE_LOCK(fp); + fp->f_flag = FREAD | FWRITE; + + switch(uap->type) { + case SYSFD_MEMORY: + fp->f_type = DTYPE_MEMFD; + fp->f_data = vm_object_allocate(OBJT_DEFAULT, npages); + fp->f_ops = &memfdops; + if (fp->f_data == NULL) + error = ENOMEM; + break; + default: + error = EINVAL; + break; + } + FILE_UNLOCK(fp); + if (error) { + fdp = td->td_proc->p_fd; + FILEDESC_LOCK(fdp); + if (fdp->fd_ofiles[fd] == fp) { + fdp->fd_ofiles[fd] = NULL; + fdp->fd_ofileflags[fd] = 0; + fdrop(fp, td); /* drop ofiles[] array reference */ + if (fd < fdp->fd_freefile) + fdp->fd_freefile = fd; + } + FILEDESC_UNLOCK(fdp); + /* closef(fp, td); NOT NECESSARY */ + } else { + td->td_retval[0] = fd; + } + fdrop(fp, td); /* drop our reference */ + return(error); +} + +/* ARGSUSED */ +static int +memfd_read(fp, uio, active_cred, flags, td) + struct file *fp; + struct uio *uio; + struct ucred *active_cred; + struct thread *td; + int flags; +{ + return(EOPNOTSUPP); +} + +static int +memfd_write(fp, uio, active_cred, flags, td) + struct file *fp; + struct uio *uio; + struct ucred *active_cred; + struct thread *td; + int flags; +{ + return(EOPNOTSUPP); +} + +/* + * we implement a very minimal set of ioctls for compatibility with sockets. + */ +static int +memfd_ioctl(fp, cmd, data, active_cred, td) + struct file *fp; + u_long cmd; + void *data; + struct ucred *active_cred; + struct thread *td; +{ + return(EINVAL); +} + +static int +memfd_poll(fp, events, active_cred, td) + struct file *fp; + int events; + struct ucred *active_cred; + struct thread *td; +{ + return(0); +} + +/* + * We shouldn't need locks here as we're doing a read and this should + * be a natural race. + */ +static int +memfd_stat(fp, ub, active_cred, td) + struct file *fp; + struct stat *ub; + struct ucred *active_cred; + struct thread *td; +{ + return(EOPNOTSUPP); +} + +/* ARGSUSED */ +static int +memfd_close(fp, td) + struct file *fp; + struct thread *td; +{ + vm_object_t object; + + FILE_LOCK(fp); + object = fp->f_data; + fp->f_data = NULL; + FILE_UNLOCK(fp); + + mtx_lock(&Giant); + if (object) + vm_object_deallocate(object); + mtx_unlock(&Giant); + return(0); +} + Index: kern/syscalls.c =================================================================== RCS file: /home/ncvs/src/sys/kern/syscalls.c,v retrieving revision 1.132 diff -u -r1.132 syscalls.c --- kern/syscalls.c 8 Jan 2003 04:57:52 -0000 1.132 +++ kern/syscalls.c 14 Jan 2003 01:58:05 -0000 @@ -2,7 +2,7 @@ * System call names. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/kern/syscalls.c,v 1.132 2003/01/08 04:57:52 davidxu Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp */ @@ -436,4 +436,5 @@ "__acl_set_link", /* 426 = __acl_set_link */ "__acl_delete_link", /* 427 = __acl_delete_link */ "__acl_aclcheck_link", /* 428 = __acl_aclcheck_link */ + "getsysfd", /* 429 = getsysfd */ }; Index: kern/syscalls.master =================================================================== RCS file: /home/ncvs/src/sys/kern/syscalls.master,v retrieving revision 1.140 diff -u -r1.140 syscalls.master --- kern/syscalls.master 4 Jan 2003 11:41:12 -0000 1.140 +++ kern/syscalls.master 14 Jan 2003 01:58:03 -0000 @@ -621,6 +621,7 @@ acl_type_t type); } 428 MSTD BSD { int __acl_aclcheck_link(const char *path, \ acl_type_t type, struct acl *aclp); } +429 MSTD BSD { int getsysfd(int type, off_t size); } ; Please copy any additions and changes to the following compatability tables: ; sys/ia64/ia32/syscalls.master (take a best guess) Index: sys/file.h =================================================================== RCS file: /home/ncvs/src/sys/sys/file.h,v retrieving revision 1.59 diff -u -r1.59 file.h --- sys/file.h 13 Jan 2003 00:28:55 -0000 1.59 +++ sys/file.h 14 Jan 2003 02:04:13 -0000 @@ -62,6 +62,7 @@ #define DTYPE_FIFO 4 /* fifo (named pipe) */ #define DTYPE_KQUEUE 5 /* event queue */ #define DTYPE_CRYPTO 6 /* crypto */ +#define DTYPE_MEMFD 7 /* memory descriptor */ #ifdef _KERNEL Index: sys/syscall.h =================================================================== RCS file: /home/ncvs/src/sys/sys/syscall.h,v retrieving revision 1.130 diff -u -r1.130 syscall.h --- sys/syscall.h 8 Jan 2003 04:57:52 -0000 1.130 +++ sys/syscall.h 14 Jan 2003 01:58:05 -0000 @@ -2,7 +2,7 @@ * System call numbers. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/sys/syscall.h,v 1.130 2003/01/08 04:57:52 davidxu Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp */ @@ -334,4 +334,5 @@ #define SYS___acl_set_link 426 #define SYS___acl_delete_link 427 #define SYS___acl_aclcheck_link 428 -#define SYS_MAXSYSCALL 429 +#define SYS_getsysfd 429 +#define SYS_MAXSYSCALL 430 Index: sys/syscall.mk =================================================================== RCS file: /home/ncvs/src/sys/sys/syscall.mk,v retrieving revision 1.85 diff -u -r1.85 syscall.mk --- sys/syscall.mk 8 Jan 2003 04:57:52 -0000 1.85 +++ sys/syscall.mk 14 Jan 2003 01:58:05 -0000 @@ -1,6 +1,6 @@ # FreeBSD system call names. # DO NOT EDIT-- this file is automatically generated. -# $FreeBSD: src/sys/sys/syscall.mk,v 1.85 2003/01/08 04:57:52 davidxu Exp $ +# $FreeBSD$ # created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp MIASM = \ syscall.o \ @@ -279,4 +279,5 @@ __acl_get_link.o \ __acl_set_link.o \ __acl_delete_link.o \ - __acl_aclcheck_link.o + __acl_aclcheck_link.o \ + getsysfd.o Index: sys/sysfd.h =================================================================== RCS file: sys/sysfd.h diff -N sys/sysfd.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sysfd.h 14 Jan 2003 04:06:19 -0000 @@ -0,0 +1,21 @@ +/* + * $FreeBSD$ + */ + +#ifndef _SYS_SYSFD_H_ +#define _SYS_SYSFD_H_ + +#define SYSFD_MEMORY 1 +#ifdef NOTYET +#define SYSFD_TIMER_SECS 2 +#define SYSFD_TIMER_TENS 3 +#define SYSFD_TIMER_MICRO 4 +#define SYSFD_TIMER_SYS 5 +#define SYSFD_TIMER_REAL 6 +#define SYSFD_TIMER_VIRT 7 +#endif + +#endif /* _SYS_SYSFD_H_ */ + +extern int getsysfd(int type, off_t size); + Index: sys/sysproto.h =================================================================== RCS file: /home/ncvs/src/sys/sys/sysproto.h,v retrieving revision 1.123 diff -u -r1.123 sysproto.h --- sys/sysproto.h 8 Jan 2003 04:57:53 -0000 1.123 +++ sys/sysproto.h 14 Jan 2003 01:58:05 -0000 @@ -2,7 +2,7 @@ * System call prototypes. * * DO NOT EDIT-- this file is automatically generated. - * $FreeBSD: src/sys/sys/sysproto.h,v 1.123 2003/01/08 04:57:53 davidxu Exp $ + * $FreeBSD$ * created from FreeBSD: src/sys/kern/syscalls.master,v 1.140 2003/01/04 11:41:12 davidxu Exp */ @@ -1223,6 +1223,10 @@ char type_l_[PADL_(acl_type_t)]; acl_type_t type; char type_r_[PADR_(acl_type_t)]; char aclp_l_[PADL_(struct acl *)]; struct acl * aclp; char aclp_r_[PADR_(struct acl *)]; }; +struct getsysfd_args { + char type_l_[PADL_(int)]; int type; char type_r_[PADR_(int)]; + char size_l_[PADL_(off_t)]; off_t size; char size_r_[PADR_(off_t)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_exit(struct thread *, struct sys_exit_args *); int fork(struct thread *, struct fork_args *); @@ -1499,6 +1503,7 @@ int __acl_set_link(struct thread *, struct __acl_set_link_args *); int __acl_delete_link(struct thread *, struct __acl_delete_link_args *); int __acl_aclcheck_link(struct thread *, struct __acl_aclcheck_link_args *); +int getsysfd(struct thread *, struct getsysfd_args *); #ifdef COMPAT_43 Index: vm/vm_extern.h =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_extern.h,v retrieving revision 1.59 diff -u -r1.59 vm_extern.h --- vm/vm_extern.h 24 Jul 2002 19:47:56 -0000 1.59 +++ vm/vm_extern.h 14 Jan 2003 03:12:06 -0000 @@ -80,6 +80,7 @@ void vm_forkproc(struct thread *, struct proc *, struct thread *, int); void vm_waitproc(struct proc *); int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, void *, vm_ooffset_t); +int vm_mmap_object(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, vm_object_t, vm_ooffset_t); vm_offset_t vm_page_alloc_contig(vm_offset_t, vm_offset_t, vm_offset_t, vm_offset_t); void vm_set_page_size(void); struct vmspace *vmspace_alloc(vm_offset_t, vm_offset_t); Index: vm/vm_mmap.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_mmap.c,v retrieving revision 1.155 diff -u -r1.155 vm_mmap.c --- vm/vm_mmap.c 13 Jan 2003 00:28:55 -0000 1.155 +++ vm/vm_mmap.c 14 Jan 2003 03:55:15 -0000 @@ -201,7 +201,7 @@ struct thread *td; struct mmap_args *uap; { - struct file *fp = NULL; + struct file *fp; struct vnode *vp; vm_offset_t addr; vm_size_t size, pageoff; @@ -264,49 +264,101 @@ return (EINVAL); if (addr + size < addr) return (EINVAL); - } - /* - * XXX for non-fixed mappings where no hint is provided or - * the hint would fall in the potential heap space, - * place it after the end of the largest possible heap. - * - * There should really be a pmap call to determine a reasonable - * location. - */ - else if (addr == 0 || + } else if (addr == 0 || (addr >= round_page((vm_offset_t)vms->vm_taddr) && - addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz))) + addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz))) { + /* + * XXX for non-fixed mappings where no hint is provided or + * the hint would fall in the potential heap space, + * place it after the end of the largest possible heap. + * + * There should really be a pmap call to determine a reasonable + * location. + */ addr = round_page((vm_offset_t)vms->vm_daddr + maxdsiz); + } mtx_lock(&Giant); /* syscall marked mp-safe but isn't */ + + /* + * Do not allow more then a certain number of vm_map_entry structures + * per process. Scale with the number of rforks sharing the map + * to make the limit reasonable for threads. + */ + if (max_proc_mmap && + vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) { + error = ENOMEM; + goto done; + } + + /* + * Extract the file descriptor (if not an anonymous mmap) + */ if (flags & MAP_ANON) { /* * Mapping blank space is trivial. */ - handle = NULL; maxprot = VM_PROT_ALL; pos = 0; } else { /* - * Mapping file, get fp for validation. Obtain vnode and make - * sure it is of appropriate type. - * don't let the descriptor disappear on us if we block + * Mapping a file descriptor. Reference the fp so it does + * not go away on us. */ if ((error = fget(td, uap->fd, &fp)) != 0) goto done; - if (fp->f_type != DTYPE_VNODE) { - error = EINVAL; - goto done; - } /* - * POSIX shared-memory objects are defined to have - * kernel persistence, and are not defined to support - * read(2)/write(2) -- or even open(2). Thus, we can - * use MAP_ASYNC to trade on-disk coherence for speed. - * The shm_open(3) library routine turns on the FPOSIXSHM - * flag to request this behavior. + * Ensure that file and memory protections are + * compatible. Note that we only worry about + * writability if mapping is shared; in this case, + * current and max prot are dictated by the open file. + * XXX use the vnode instead? Problem is: what + * credentials do we use for determination? What if + * proc does a setuid? */ + maxprot = VM_PROT_EXECUTE; /* ??? */ + if (fp->f_flag & FREAD) { + maxprot |= VM_PROT_READ; + } else if (prot & PROT_READ) { + error = EACCES; + goto done; + } + } + + /* + * Handle MEMFD descriptors. These reference the VM object directly. + */ + if (fp && fp->f_type == DTYPE_MEMFD && fp->f_data) { + mtx_unlock(&Giant); + obj = fp->f_data; + vm_object_reference(obj); + error = vm_mmap_object(&vms->vm_map, &addr, size, prot, + maxprot, flags, obj, pos); + if (error == 0) + td->td_retval[0] = (register_t) (addr + pageoff); + mtx_lock(&Giant); + vm_object_deallocate(obj); + goto done2; + } + + /* + * Otherwise it must be an anonymous mapping or a VNODE + */ + if (fp != NULL && fp->f_type != DTYPE_VNODE) { + error = EINVAL; + goto done; + } + + /* + * POSIX shared-memory objects are defined to have + * kernel persistence, and are not defined to support + * read(2)/write(2) -- or even open(2). Thus, we can + * use MAP_ASYNC to trade on-disk coherence for speed. + * The shm_open(3) library routine turns on the FPOSIXSHM + * flag to request this behavior. + */ + if (fp) { if (fp->f_flag & FPOSIXSHM) flags |= MAP_NOSYNC; vp = fp->f_data; @@ -363,22 +415,7 @@ error = EINVAL; goto done; } - /* - * Ensure that file and memory protections are - * compatible. Note that we only worry about - * writability if mapping is shared; in this case, - * current and max prot are dictated by the open file. - * XXX use the vnode instead? Problem is: what - * credentials do we use for determination? What if - * proc does a setuid? - */ - maxprot = VM_PROT_EXECUTE; /* ??? */ - if (fp->f_flag & FREAD) { - maxprot |= VM_PROT_READ; - } else if (prot & PROT_READ) { - error = EACCES; - goto done; - } + /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character @@ -414,17 +451,8 @@ handle = (void *)vp; } - } - - /* - * Do not allow more then a certain number of vm_map_entry structures - * per process. Scale with the number of rforks sharing the map - * to make the limit reasonable for threads. - */ - if (max_proc_mmap && - vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) { - error = ENOMEM; - goto done; + } else { + handle = NULL; } mtx_unlock(&Giant); @@ -444,10 +472,10 @@ done: if (vp) vput(vp); +done2: mtx_unlock(&Giant); if (fp) fdrop(fp, td); - return (error); } @@ -1272,3 +1300,102 @@ return (EINVAL); } } + +/* + * vm_mmap_object() + * + * MPSAFE + * + * Internal version of mmap that directly operates on a VM object. + * Currently used by mmap. + */ +int +vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, + vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff) +{ + boolean_t fitit; + int rv = KERN_SUCCESS; + int docow; + struct thread *td = curthread; + + if (size == 0) + return (0); + + size = round_page(size); + + if (td->td_proc->p_vmspace->vm_map.size + size > + td->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) { + return(ENOMEM); + } + + /* + * We currently can only deal with page aligned file offsets. + * The check is here rather than in the syscall because the + * kernel calls this function internally for other mmaping + * operations (such as in exec) and non-aligned offsets will + * cause pmap inconsistencies...so we want to be sure to + * disallow this in all cases. + */ + if (foff & PAGE_MASK) + return (EINVAL); + + if ((flags & MAP_FIXED) == 0) { + fitit = TRUE; + *addr = round_page(*addr); + } else { + if (*addr != trunc_page(*addr)) + return (EINVAL); + fitit = FALSE; + (void) vm_map_remove(map, *addr, *addr + size); + } + + docow = MAP_PREFAULT_PARTIAL; + + if ((flags & (MAP_ANON|MAP_SHARED)) == 0) + docow |= MAP_COPY_ON_WRITE; + if (flags & MAP_NOCORE) + docow |= MAP_DISABLE_COREDUMP; + +#if defined(VM_PROT_READ_IS_EXEC) + if (prot & VM_PROT_READ) + prot |= VM_PROT_EXECUTE; + + if (maxprot & VM_PROT_READ) + maxprot |= VM_PROT_EXECUTE; +#endif + + if (fitit) + *addr = pmap_addr_hint(object, *addr, size); + + vm_object_reference(object); + if (flags & MAP_STACK) { + rv = vm_map_stack (map, *addr, size, prot, maxprot, docow); + } else { + rv = vm_map_find(map, object, foff, addr, size, fitit, + prot, maxprot, docow); + } + if (rv != KERN_SUCCESS) + vm_object_deallocate(object); + + switch(rv) { + case KERN_SUCCESS: + if (flags & MAP_SHARED) { + /* + * Shared memory is also shared with children. + */ + rv = vm_map_inherit(map, *addr, *addr + size, + VM_INHERIT_SHARE); + if (rv != KERN_SUCCESS) + (void)vm_map_remove(map, *addr, *addr + size); + } + return(0); + case KERN_INVALID_ADDRESS: + case KERN_NO_SPACE: + return (ENOMEM); + case KERN_PROTECTION_FAILURE: + return (EACCES); + default: + return (EINVAL); + } +} + To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-arch" in the body of the message