Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 4 Jun 2015 19:41:16 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r283998 - in head/sys: dev/drm dev/drm2 fs/devfs kern sys vm
Message-ID:  <201506041941.t54JfGAA075455@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Thu Jun  4 19:41:15 2015
New Revision: 283998
URL: https://svnweb.freebsd.org/changeset/base/283998

Log:
  Add a new file operations hook for mmap operations.  File type-specific
  logic is now placed in the mmap hook implementation rather than requiring
  it to be placed in sys/vm/vm_mmap.c.  This hook allows new file types to
  support mmap() as well as potentially allowing mmap() for existing file
  types that do not currently support any mapping.
  
  The vm_mmap() function is now split up into two functions.  A new
  vm_mmap_object() function handles the "back half" of vm_mmap() and accepts
  a referenced VM object to map rather than a (handle, handle_type) tuple.
  vm_mmap() is now reduced to converting a (handle, handle_type) tuple to a
  a VM object and then calling vm_mmap_object() to handle the actual mapping.
  The vm_mmap() function remains for use by other parts of the kernel
  (e.g. device drivers and exec) but now only supports mapping vnodes,
  character devices, and anonymous memory.
  
  The mmap() system call invokes vm_mmap_object() directly with a NULL object
  for anonymous mappings.  For mappings using a file descriptor, the
  descriptors fo_mmap() hook is invoked instead.  The fo_mmap() hook is
  responsible for performing type-specific checks and adjustments to
  arguments as well as possibly modifying mapping parameters such as flags
  or the object offset.  The fo_mmap() hook routines then call
  vm_mmap_object() to handle the actual mapping.
  
  The fo_mmap() hook is optional.  If it is not set, then fo_mmap() will
  fail with ENODEV.  A fo_mmap() hook is implemented for regular files,
  character devices, and shared memory objects (created via shm_open()).
  
  While here, consistently use the VM_PROT_* constants for the vm_prot_t
  type for the 'prot' variable passed to vm_mmap() and vm_mmap_object()
  as well as the vm_mmap_vnode() and vm_mmap_cdev() helper routines.
  Previously some places were using the mmap()-specific PROT_* constants
  instead.  While this happens to work because PROT_xx == VM_PROT_xx,
  using VM_PROT_* is more correct.
  
  Differential Revision:	https://reviews.freebsd.org/D2658
  Reviewed by:	alc (glanced over), kib
  MFC after:	1 month
  Sponsored by:	Chelsio

Modified:
  head/sys/dev/drm/drm_bufs.c
  head/sys/dev/drm2/drm_bufs.c
  head/sys/fs/devfs/devfs_vnops.c
  head/sys/kern/subr_uio.c
  head/sys/kern/uipc_shm.c
  head/sys/kern/vfs_vnops.c
  head/sys/sys/file.h
  head/sys/sys/mman.h
  head/sys/vm/vm_extern.h
  head/sys/vm/vm_mmap.c

Modified: head/sys/dev/drm/drm_bufs.c
==============================================================================
--- head/sys/dev/drm/drm_bufs.c	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/dev/drm/drm_bufs.c	Thu Jun  4 19:41:15 2015	(r283998)
@@ -1067,12 +1067,12 @@ int drm_mapbufs(struct drm_device *dev, 
 
 	vaddr = round_page((vm_offset_t)vms->vm_daddr + MAXDSIZ);
 #if __FreeBSD_version >= 600023
-	retcode = vm_mmap(&vms->vm_map, &vaddr, size, PROT_READ | PROT_WRITE,
-	    VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC, OBJT_DEVICE,
+	retcode = vm_mmap(&vms->vm_map, &vaddr, size, VM_PROT_READ |
+	    VM_PROT_WRITE, VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC, OBJT_DEVICE,
 	    dev->devnode, foff);
 #else
-	retcode = vm_mmap(&vms->vm_map, &vaddr, size, PROT_READ | PROT_WRITE,
-	    VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC,
+	retcode = vm_mmap(&vms->vm_map, &vaddr, size, VM_PROT_READ |
+	    VM_PROT_WRITE, VM_PROT_ALL, MAP_SHARED | MAP_NOSYNC,
 	    SLIST_FIRST(&dev->devnode->si_hlist), foff);
 #endif
 	if (retcode)

Modified: head/sys/dev/drm2/drm_bufs.c
==============================================================================
--- head/sys/dev/drm2/drm_bufs.c	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/dev/drm2/drm_bufs.c	Thu Jun  4 19:41:15 2015	(r283998)
@@ -1635,12 +1635,12 @@ int drm_mapbufs(struct drm_device *dev, 
 				goto done;
 			}
 			retcode = vm_mmap(&vms->vm_map, &virtual, map->size,
-			    PROT_READ | PROT_WRITE, VM_PROT_ALL,
+			    VM_PROT_READ | VM_PROT_WRITE, VM_PROT_ALL,
 			    MAP_SHARED | MAP_NOSYNC, OBJT_DEVICE,
 			    file_priv->minor->device, token);
 		} else {
 			retcode = vm_mmap(&vms->vm_map, &virtual, dma->byte_count,
-			    PROT_READ | PROT_WRITE, VM_PROT_ALL,
+			    VM_PROT_READ | VM_PROT_WRITE, VM_PROT_ALL,
 			    MAP_SHARED | MAP_NOSYNC, OBJT_DEVICE,
 			    file_priv->minor->device, 0);
 		}

Modified: head/sys/fs/devfs/devfs_vnops.c
==============================================================================
--- head/sys/fs/devfs/devfs_vnops.c	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/fs/devfs/devfs_vnops.c	Thu Jun  4 19:41:15 2015	(r283998)
@@ -51,6 +51,7 @@
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
+#include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
@@ -71,6 +72,10 @@ static struct fileops devfs_ops_f;
 
 #include <security/mac/mac_framework.h>
 
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
+
 static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data");
 
 struct mtx	devfs_de_interlock;
@@ -1738,6 +1743,65 @@ devfs_write_f(struct file *fp, struct ui
 	return (error);
 }
 
+static int
+devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
+    vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
+    struct thread *td)
+{
+	struct cdev *dev;
+	struct cdevsw *dsw;
+	struct mount *mp;
+	struct vnode *vp;
+	struct file *fpop;
+	vm_object_t object;
+	vm_prot_t maxprot;
+	int error, ref;
+
+	vp = fp->f_vnode;
+
+	/*
+	 * Ensure that file and memory protections are
+	 * compatible.
+	 */
+	mp = vp->v_mount;
+	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0)
+		maxprot = VM_PROT_NONE;
+	else
+		maxprot = VM_PROT_EXECUTE;
+	if ((fp->f_flag & FREAD) != 0)
+		maxprot |= VM_PROT_READ;
+	else if ((prot & VM_PROT_READ) != 0)
+		return (EACCES);
+
+	/*
+	 * Character devices always share mappings, so
+	 * require a writable fd for writable mappings.
+	 */
+	if ((fp->f_flag & FWRITE) != 0)
+		maxprot |= VM_PROT_WRITE;
+	else if ((prot & VM_PROT_WRITE) != 0)
+		return (EACCES);
+	maxprot &= cap_maxprot;
+
+	fpop = td->td_fpop;
+	error = devfs_fp_check(fp, &dev, &dsw, &ref);
+	if (error != 0)
+		return (error);
+
+	error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff,
+	    &object);
+	td->td_fpop = fpop;
+	dev_relthread(dev, ref);
+	if (error != 0)
+		return (error);
+
+	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
+	    foff, FALSE, td);
+	if (error != 0)
+		vm_object_deallocate(object);
+	return (error);
+}
+
 dev_t
 dev2udev(struct cdev *x)
 {
@@ -1760,6 +1824,7 @@ static struct fileops devfs_ops_f = {
 	.fo_sendfile =	vn_sendfile,
 	.fo_seek =	vn_seek,
 	.fo_fill_kinfo = vn_fill_kinfo,
+	.fo_mmap =	devfs_mmap_f,
 	.fo_flags =	DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 

Modified: head/sys/kern/subr_uio.c
==============================================================================
--- head/sys/kern/subr_uio.c	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/kern/subr_uio.c	Thu Jun  4 19:41:15 2015	(r283998)
@@ -417,7 +417,7 @@ copyout_map(struct thread *td, vm_offset
 	/* round size up to page boundry */
 	size = (vm_size_t)round_page(sz);
 
-	error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE,
+	error = vm_mmap(&vms->vm_map, addr, size, VM_PROT_READ | VM_PROT_WRITE,
 	    VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0);
 
 	return (error);

Modified: head/sys/kern/uipc_shm.c
==============================================================================
--- head/sys/kern/uipc_shm.c	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/kern/uipc_shm.c	Thu Jun  4 19:41:15 2015	(r283998)
@@ -127,6 +127,7 @@ static fo_chmod_t	shm_chmod;
 static fo_chown_t	shm_chown;
 static fo_seek_t	shm_seek;
 static fo_fill_kinfo_t	shm_fill_kinfo;
+static fo_mmap_t	shm_mmap;
 
 /* File descriptor operations. */
 static struct fileops shm_ops = {
@@ -143,6 +144,7 @@ static struct fileops shm_ops = {
 	.fo_sendfile = vn_sendfile,
 	.fo_seek = shm_seek,
 	.fo_fill_kinfo = shm_fill_kinfo,
+	.fo_mmap = shm_mmap,
 	.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
@@ -851,15 +853,37 @@ sys_shm_unlink(struct thread *td, struct
 	return (error);
 }
 
-/*
- * mmap() helper to validate mmap() requests against shm object state
- * and give mmap() the vm_object to use for the mapping.
- */
 int
-shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff,
-    vm_object_t *obj)
+shm_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t objsize,
+    vm_prot_t prot, vm_prot_t cap_maxprot, int flags,
+    vm_ooffset_t foff, struct thread *td)
 {
+	struct shmfd *shmfd;
+	vm_prot_t maxprot;
+	int error;
+
+	shmfd = fp->f_data;
+	maxprot = VM_PROT_NONE;
+
+	/* FREAD should always be set. */
+	if ((fp->f_flag & FREAD) != 0)
+		maxprot |= VM_PROT_EXECUTE | VM_PROT_READ;
+	if ((fp->f_flag & FWRITE) != 0)
+		maxprot |= VM_PROT_WRITE;
+
+	/* Don't permit shared writable mappings on read-only descriptors. */
+	if ((flags & MAP_SHARED) != 0 &&
+	    (maxprot & VM_PROT_WRITE) == 0 &&
+	    (prot & VM_PROT_WRITE) != 0)
+		return (EACCES);
+	maxprot &= cap_maxprot;
 
+#ifdef MAC
+	error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, flags);
+	if (error != 0)
+		return (error);
+#endif
+	
 	/*
 	 * XXXRW: This validation is probably insufficient, and subject to
 	 * sign errors.  It should be fixed.
@@ -872,7 +896,11 @@ shm_mmap(struct shmfd *shmfd, vm_size_t 
 	vfs_timestamp(&shmfd->shm_atime);
 	mtx_unlock(&shm_timestamp_lock);
 	vm_object_reference(shmfd->shm_object);
-	*obj = shmfd->shm_object;
+
+	error = vm_mmap_object(map, addr, objsize, prot, maxprot, flags,
+	    shmfd->shm_object, foff, FALSE, td);
+	if (error != 0)
+		vm_object_deallocate(shmfd->shm_object);
 	return (0);
 }
 

Modified: head/sys/kern/vfs_vnops.c
==============================================================================
--- head/sys/kern/vfs_vnops.c	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/kern/vfs_vnops.c	Thu Jun  4 19:41:15 2015	(r283998)
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
+#include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
@@ -80,6 +81,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
+#include <vm/vnode_pager.h>
 
 static fo_rdwr_t	vn_read;
 static fo_rdwr_t	vn_write;
@@ -90,6 +92,7 @@ static fo_poll_t	vn_poll;
 static fo_kqfilter_t	vn_kqfilter;
 static fo_stat_t	vn_statfile;
 static fo_close_t	vn_closefile;
+static fo_mmap_t	vn_mmap;
 
 struct 	fileops vnops = {
 	.fo_read = vn_io_fault,
@@ -105,6 +108,7 @@ struct 	fileops vnops = {
 	.fo_sendfile = vn_sendfile,
 	.fo_seek = vn_seek,
 	.fo_fill_kinfo = vn_fill_kinfo,
+	.fo_mmap = vn_mmap,
 	.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE
 };
 
@@ -2362,3 +2366,95 @@ vn_fill_kinfo_vnode(struct vnode *vp, st
 	kif->kf_un.kf_file.kf_file_rdev = va.va_rdev;
 	return (0);
 }
+
+int
+vn_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
+    vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
+    struct thread *td)
+{
+#ifdef HWPMC_HOOKS
+	struct pmckern_map_in pkm;
+#endif
+	struct mount *mp;
+	struct vnode *vp;
+	vm_object_t object;
+	vm_prot_t maxprot;
+	boolean_t writecounted;
+	int error;
+
+#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
+    defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
+	/*
+	 * POSIX shared-memory objects are defined to have
+	 * kernel persistence, and are not defined to support
+	 * read(2)/write(2) -- or even open(2).  Thus, we can
+	 * use MAP_ASYNC to trade on-disk coherence for speed.
+	 * The shm_open(3) library routine turns on the FPOSIXSHM
+	 * flag to request this behavior.
+	 */
+	if ((fp->f_flag & FPOSIXSHM) != 0)
+		flags |= MAP_NOSYNC;
+#endif
+	vp = fp->f_vnode;
+
+	/*
+	 * Ensure that file and memory protections are
+	 * compatible.  Note that we only worry about
+	 * writability if mapping is shared; in this case,
+	 * current and max prot are dictated by the open file.
+	 * XXX use the vnode instead?  Problem is: what
+	 * credentials do we use for determination? What if
+	 * proc does a setuid?
+	 */
+	mp = vp->v_mount;
+	if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0)
+		maxprot = VM_PROT_NONE;
+	else
+		maxprot = VM_PROT_EXECUTE;
+	if ((fp->f_flag & FREAD) != 0)
+		maxprot |= VM_PROT_READ;
+	else if ((prot & VM_PROT_READ) != 0)
+		return (EACCES);
+
+	/*
+	 * If we are sharing potential changes via MAP_SHARED and we
+	 * are trying to get write permission although we opened it
+	 * without asking for it, bail out.
+	 */
+	if ((flags & MAP_SHARED) != 0) {
+		if ((fp->f_flag & FWRITE) != 0)
+			maxprot |= VM_PROT_WRITE;
+		else if ((prot & VM_PROT_WRITE) != 0)
+			return (EACCES);
+	} else {
+		maxprot |= VM_PROT_WRITE;
+		cap_maxprot |= VM_PROT_WRITE;
+	}
+	maxprot &= cap_maxprot;
+
+	writecounted = FALSE;
+	error = vm_mmap_vnode(td, size, prot, &maxprot, &flags, vp,
+	    &foff, &object, &writecounted);
+	if (error != 0)
+		return (error);
+	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
+	    foff, writecounted, td);
+	if (error != 0) {
+		/*
+		 * If this mapping was accounted for in the vnode's
+		 * writecount, then undo that now.
+		 */
+		if (writecounted)
+			vnode_pager_release_writecount(object, 0, size);
+		vm_object_deallocate(object);
+	}
+#ifdef HWPMC_HOOKS
+	/* Inform hwpmc(4) if an executable is being mapped. */
+	if (error == 0 && (prot & VM_PROT_EXECUTE) != 0) {
+		pkm.pm_file = vp;
+		pkm.pm_address = (uintptr_t) addr;
+		PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm);
+	}
+#endif
+	return (error);
+}

Modified: head/sys/sys/file.h
==============================================================================
--- head/sys/sys/file.h	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/sys/file.h	Thu Jun  4 19:41:15 2015	(r283998)
@@ -42,6 +42,7 @@
 #include <sys/refcount.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
+#include <vm/vm.h>
 
 struct filedesc;
 struct stat;
@@ -115,6 +116,9 @@ typedef int fo_seek_t(struct file *fp, o
 		    struct thread *td);
 typedef int fo_fill_kinfo_t(struct file *fp, struct kinfo_file *kif,
 		    struct filedesc *fdp);
+typedef int fo_mmap_t(struct file *fp, vm_map_t map, vm_offset_t *addr,
+		    vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot,
+		    int flags, vm_ooffset_t foff, struct thread *td);
 typedef	int fo_flags_t;
 
 struct fileops {
@@ -131,6 +135,7 @@ struct fileops {
 	fo_sendfile_t	*fo_sendfile;
 	fo_seek_t	*fo_seek;
 	fo_fill_kinfo_t	*fo_fill_kinfo;
+	fo_mmap_t	*fo_mmap;
 	fo_flags_t	fo_flags;	/* DFLAG_* below */
 };
 
@@ -391,6 +396,18 @@ fo_fill_kinfo(struct file *fp, struct ki
 	return ((*fp->f_ops->fo_fill_kinfo)(fp, kif, fdp));
 }
 
+static __inline int
+fo_mmap(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size,
+    vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff,
+    struct thread *td)
+{
+
+	if (fp->f_ops->fo_mmap == NULL)
+		return (ENODEV);
+	return ((*fp->f_ops->fo_mmap)(fp, map, addr, size, prot, cap_maxprot,
+	    flags, foff, td));
+}
+
 #endif /* _KERNEL */
 
 #endif /* !SYS_FILE_H */

Modified: head/sys/sys/mman.h
==============================================================================
--- head/sys/sys/mman.h	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/sys/mman.h	Thu Jun  4 19:41:15 2015	(r283998)
@@ -230,8 +230,6 @@ struct shmfd {
 #endif
 
 #ifdef _KERNEL
-int	shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff,
-	    vm_object_t *obj);
 int	shm_map(struct file *fp, size_t size, off_t offset, void **memp);
 int	shm_unmap(struct file *fp, void *mem, size_t size);
 

Modified: head/sys/vm/vm_extern.h
==============================================================================
--- head/sys/vm/vm_extern.h	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/vm/vm_extern.h	Thu Jun  4 19:41:15 2015	(r283998)
@@ -40,6 +40,8 @@ struct vnode;
 struct vmem;
 
 #ifdef _KERNEL
+struct cdev;
+struct cdevsw;
 
 /* These operate on kernel virtual addresses only. */
 vm_offset_t kva_alloc(vm_size_t);
@@ -81,10 +83,18 @@ int vm_fault_hold(vm_map_t map, vm_offse
     int fault_flags, vm_page_t *m_hold);
 int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
     vm_prot_t prot, vm_page_t *ma, int max_count);
-int vm_forkproc(struct thread *, struct proc *, struct thread *, struct vmspace *, int);
+int vm_forkproc(struct thread *, struct proc *, struct thread *,
+    struct vmspace *, int);
 void vm_waitproc(struct proc *);
-int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int, objtype_t, void *, vm_ooffset_t);
+int vm_mmap(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t, vm_prot_t, int,
+    objtype_t, void *, vm_ooffset_t);
+int vm_mmap_object(vm_map_t, vm_offset_t *, vm_size_t, vm_prot_t,
+    vm_prot_t, int, vm_object_t, vm_ooffset_t, boolean_t, struct thread *);
 int vm_mmap_to_errno(int rv);
+int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
+    int *, struct cdev *, struct cdevsw *, vm_ooffset_t *, vm_object_t *);
+int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *, int *,
+    struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *);
 void vm_set_page_size(void);
 void vm_sync_icache(vm_map_t, vm_offset_t, vm_size_t);
 typedef int (*pmap_pinit_t)(struct pmap *pmap);

Modified: head/sys/vm/vm_mmap.c
==============================================================================
--- head/sys/vm/vm_mmap.c	Thu Jun  4 19:18:58 2015	(r283997)
+++ head/sys/vm/vm_mmap.c	Thu Jun  4 19:41:15 2015	(r283998)
@@ -100,13 +100,6 @@ SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTL
 #define	MAP_32BIT_MAX_ADDR	((vm_offset_t)1 << 31)
 #endif
 
-static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
-    int *, struct vnode *, vm_ooffset_t *, vm_object_t *, boolean_t *);
-static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
-    int *, struct cdev *, vm_ooffset_t *, vm_object_t *);
-static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
-    int *, struct shmfd *, vm_ooffset_t, vm_object_t *);
-
 #ifndef _SYS_SYSPROTO_H_
 struct sbrk_args {
 	int incr;
@@ -197,16 +190,10 @@ sys_mmap(td, uap)
 	struct thread *td;
 	struct mmap_args *uap;
 {
-#ifdef HWPMC_HOOKS
-	struct pmckern_map_in pkm;
-#endif
 	struct file *fp;
-	struct vnode *vp;
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
-	vm_prot_t cap_maxprot, maxprot;
-	void *handle;
-	objtype_t handle_type;
+	vm_prot_t cap_maxprot;
 	int align, error, flags, prot;
 	off_t pos;
 	struct vmspace *vms = td->td_proc->p_vmspace;
@@ -334,14 +321,22 @@ sys_mmap(td, uap)
 			    lim_max(td->td_proc, RLIMIT_DATA));
 		PROC_UNLOCK(td->td_proc);
 	}
-	if (flags & MAP_ANON) {
+	if (size == 0) {
+		/*
+		 * Return success without mapping anything for old
+		 * binaries that request a page-aligned mapping of
+		 * length 0.  For modern binaries, this function
+		 * returns an error earlier.
+		 */
+		error = 0;
+	} else if (flags & MAP_ANON) {
 		/*
 		 * Mapping blank space is trivial.
+		 *
+		 * This relies on VM_PROT_* matching PROT_*.
 		 */
-		handle = NULL;
-		handle_type = OBJT_DEFAULT;
-		maxprot = VM_PROT_ALL;
-		cap_maxprot = VM_PROT_ALL;
+		error = vm_mmap_object(&vms->vm_map, &addr, size, prot,
+		    VM_PROT_ALL, flags, NULL, pos, FALSE, td);
 	} else {
 		/*
 		 * Mapping file, get fp for validation and don't let the
@@ -366,93 +361,12 @@ sys_mmap(td, uap)
 			error = EINVAL;
 			goto done;
 		}
-		if (fp->f_type == DTYPE_SHM) {
-			handle = fp->f_data;
-			handle_type = OBJT_SWAP;
-			maxprot = VM_PROT_NONE;
-
-			/* FREAD should always be set. */
-			if (fp->f_flag & FREAD)
-				maxprot |= VM_PROT_EXECUTE | VM_PROT_READ;
-			if (fp->f_flag & FWRITE)
-				maxprot |= VM_PROT_WRITE;
-			goto map;
-		}
-		if (fp->f_type != DTYPE_VNODE) {
-			error = ENODEV;
-			goto done;
-		}
-#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
-    defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
-		/*
-		 * POSIX shared-memory objects are defined to have
-		 * kernel persistence, and are not defined to support
-		 * read(2)/write(2) -- or even open(2).  Thus, we can
-		 * use MAP_ASYNC to trade on-disk coherence for speed.
-		 * The shm_open(3) library routine turns on the FPOSIXSHM
-		 * flag to request this behavior.
-		 */
-		if (fp->f_flag & FPOSIXSHM)
-			flags |= MAP_NOSYNC;
-#endif
-		vp = fp->f_vnode;
-		/*
-		 * Ensure that file and memory protections are
-		 * compatible.  Note that we only worry about
-		 * writability if mapping is shared; in this case,
-		 * current and max prot are dictated by the open file.
-		 * XXX use the vnode instead?  Problem is: what
-		 * credentials do we use for determination? What if
-		 * proc does a setuid?
-		 */
-		if (vp->v_mount != NULL && vp->v_mount->mnt_flag & MNT_NOEXEC)
-			maxprot = VM_PROT_NONE;
-		else
-			maxprot = VM_PROT_EXECUTE;
-		if (fp->f_flag & FREAD) {
-			maxprot |= VM_PROT_READ;
-		} else if (prot & PROT_READ) {
-			error = EACCES;
-			goto done;
-		}
-		/*
-		 * If we are sharing potential changes (either via
-		 * MAP_SHARED or via the implicit sharing of character
-		 * device mappings), and we are trying to get write
-		 * permission although we opened it without asking
-		 * for it, bail out.
-		 */
-		if ((flags & MAP_SHARED) != 0) {
-			if ((fp->f_flag & FWRITE) != 0) {
-				maxprot |= VM_PROT_WRITE;
-			} else if ((prot & PROT_WRITE) != 0) {
-				error = EACCES;
-				goto done;
-			}
-		} else if (vp->v_type != VCHR || (fp->f_flag & FWRITE) != 0) {
-			maxprot |= VM_PROT_WRITE;
-			cap_maxprot |= VM_PROT_WRITE;
-		}
-		handle = (void *)vp;
-		handle_type = OBJT_VNODE;
-	}
-map:
-	td->td_fpop = fp;
-	maxprot &= cap_maxprot;
 
-	/* This relies on VM_PROT_* matching PROT_*. */
-	error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot,
-	    flags, handle_type, handle, pos);
-	td->td_fpop = NULL;
-#ifdef HWPMC_HOOKS
-	/* inform hwpmc(4) if an executable is being mapped */
-	if (error == 0 && handle_type == OBJT_VNODE &&
-	    (prot & PROT_EXEC)) {
-		pkm.pm_file = handle;
-		pkm.pm_address = (uintptr_t) addr;
-		PMC_CALL_HOOK(td, PMC_FN_MMAP, (void *) &pkm);
+		/* This relies on VM_PROT_* matching PROT_*. */
+		error = fo_mmap(fp, &vms->vm_map, &addr, size, prot,
+		    cap_maxprot, flags, pos, td);
 	}
-#endif
+
 	if (error == 0)
 		td->td_retval[0] = (register_t) (addr + pageoff);
 done:
@@ -1311,9 +1225,6 @@ sys_munlock(td, uap)
  *
  * Helper function for vm_mmap.  Perform sanity check specific for mmap
  * operations on vnodes.
- *
- * For VCHR vnodes, the vnode lock is held over the call to
- * vm_mmap_cdev() to keep vp->v_rdev valid.
  */
 int
 vm_mmap_vnode(struct thread *td, vm_size_t objsize,
@@ -1360,12 +1271,6 @@ vm_mmap_vnode(struct thread *td, vm_size
 			*writecounted = TRUE;
 			vnode_pager_update_writecount(obj, 0, objsize);
 		}
-	} else if (vp->v_type == VCHR) {
-		error = vm_mmap_cdev(td, objsize, prot, maxprotp, flagsp,
-		    vp->v_rdev, foffp, objp);
-		if (error == 0)
-			goto mark_atime;
-		goto done;
 	} else {
 		error = EINVAL;
 		goto done;
@@ -1373,13 +1278,14 @@ vm_mmap_vnode(struct thread *td, vm_size
 	if ((error = VOP_GETATTR(vp, &va, cred)))
 		goto done;
 #ifdef MAC
-	error = mac_vnode_check_mmap(cred, vp, prot, flags);
+	/* This relies on VM_PROT_* matching PROT_*. */
+	error = mac_vnode_check_mmap(cred, vp, (int)prot, flags);
 	if (error != 0)
 		goto done;
 #endif
 	if ((flags & MAP_SHARED) != 0) {
 		if ((va.va_flags & (SF_SNAPSHOT|IMMUTABLE|APPEND)) != 0) {
-			if (prot & PROT_WRITE) {
+			if (prot & VM_PROT_WRITE) {
 				error = EPERM;
 				goto done;
 			}
@@ -1414,7 +1320,6 @@ vm_mmap_vnode(struct thread *td, vm_size
 	*objp = obj;
 	*flagsp = flags;
 
-mark_atime:
 	vfs_mark_atime(vp, cred);
 
 done:
@@ -1435,21 +1340,18 @@ done:
  * operations on cdevs.
  */
 int
-vm_mmap_cdev(struct thread *td, vm_size_t objsize,
-    vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
-    struct cdev *cdev, vm_ooffset_t *foff, vm_object_t *objp)
+vm_mmap_cdev(struct thread *td, vm_size_t objsize, vm_prot_t prot,
+    vm_prot_t *maxprotp, int *flagsp, struct cdev *cdev, struct cdevsw *dsw,
+    vm_ooffset_t *foff, vm_object_t *objp)
 {
 	vm_object_t obj;
-	struct cdevsw *dsw;
-	int error, flags, ref;
+	int error, flags;
 
 	flags = *flagsp;
 
-	dsw = dev_refthread(cdev, &ref);
-	if (dsw == NULL)
-		return (ENXIO);
 	if (dsw->d_flags & D_MMAP_ANON) {
-		dev_relthread(cdev, ref);
+		*objp = NULL;
+		*foff = 0;
 		*maxprotp = VM_PROT_ALL;
 		*flagsp |= MAP_ANON;
 		return (0);
@@ -1458,24 +1360,18 @@ vm_mmap_cdev(struct thread *td, vm_size_
 	 * cdevs do not provide private mappings of any kind.
 	 */
 	if ((*maxprotp & VM_PROT_WRITE) == 0 &&
-	    (prot & PROT_WRITE) != 0) {
-		dev_relthread(cdev, ref);
+	    (prot & VM_PROT_WRITE) != 0)
 		return (EACCES);
-	}
-	if (flags & (MAP_PRIVATE|MAP_COPY)) {
-		dev_relthread(cdev, ref);
+	if (flags & (MAP_PRIVATE|MAP_COPY))
 		return (EINVAL);
-	}
 	/*
 	 * Force device mappings to be shared.
 	 */
 	flags |= MAP_SHARED;
 #ifdef MAC_XXX
-	error = mac_cdev_check_mmap(td->td_ucred, cdev, prot);
-	if (error != 0) {
-		dev_relthread(cdev, ref);
+	error = mac_cdev_check_mmap(td->td_ucred, cdev, (int)prot);
+	if (error != 0)
 		return (error);
-	}
 #endif
 	/*
 	 * First, try d_mmap_single().  If that is not implemented
@@ -1487,7 +1383,6 @@ vm_mmap_cdev(struct thread *td, vm_size_
 	 * XXX assumes VM_PROT_* == PROT_*
 	 */
 	error = dsw->d_mmap_single(cdev, foff, objsize, objp, (int)prot);
-	dev_relthread(cdev, ref);
 	if (error != ENODEV)
 		return (error);
 	obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
@@ -1500,59 +1395,89 @@ vm_mmap_cdev(struct thread *td, vm_size_
 }
 
 /*
- * vm_mmap_shm()
- *
- * MPSAFE
+ * vm_mmap()
  *
- * Helper function for vm_mmap.  Perform sanity check specific for mmap
- * operations on shm file descriptors.
+ * Internal version of mmap used by exec, sys5 shared memory, and
+ * various device drivers.  Handle is either a vnode pointer, a
+ * character device, or NULL for MAP_ANON.
  */
 int
-vm_mmap_shm(struct thread *td, vm_size_t objsize,
-    vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
-    struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp)
+vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
+	vm_prot_t maxprot, int flags,
+	objtype_t handle_type, void *handle,
+	vm_ooffset_t foff)
 {
+	vm_object_t object;
+	struct thread *td = curthread;
 	int error;
+	boolean_t writecounted;
 
-	if ((*flagsp & MAP_SHARED) != 0 &&
-	    (*maxprotp & VM_PROT_WRITE) == 0 &&
-	    (prot & PROT_WRITE) != 0)
-		return (EACCES);
-#ifdef MAC
-	error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp);
-	if (error != 0)
-		return (error);
-#endif
-	error = shm_mmap(shmfd, objsize, foff, objp);
+	if (size == 0)
+		return (EINVAL);
+
+	size = round_page(size);
+	writecounted = FALSE;
+
+	/*
+	 * Lookup/allocate object.
+	 */
+	switch (handle_type) {
+	case OBJT_DEVICE: {
+		struct cdevsw *dsw;
+		struct cdev *cdev;
+		int ref;
+
+		cdev = handle;
+		dsw = dev_refthread(cdev, &ref);
+		if (dsw == NULL)
+			return (ENXIO);
+		error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, cdev,
+		    dsw, &foff, &object);
+		dev_relthread(cdev, ref);
+		break;
+	}
+	case OBJT_VNODE:
+		error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
+		    handle, &foff, &object, &writecounted);
+		break;
+	case OBJT_DEFAULT:
+		if (handle == NULL) {
+			error = 0;
+			break;
+		}
+		/* FALLTHROUGH */
+	default:
+		error = EINVAL;
+		break;
+	}
 	if (error)
 		return (error);
-	return (0);
+
+	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
+	    foff, writecounted, td);
+	if (error != 0 && object != NULL) {
+		/*
+		 * If this mapping was accounted for in the vnode's
+		 * writecount, then undo that now.
+		 */
+		if (writecounted)
+			vnode_pager_release_writecount(object, 0, size);
+		vm_object_deallocate(object);
+	}
+	return (error);
 }
 
 /*
- * vm_mmap()
- *
- * MPSAFE
- *
- * Internal version of mmap.  Currently used by mmap, exec, and sys5
- * shared memory.  Handle is either a vnode pointer or NULL for MAP_ANON.
+ * Internal version of mmap that maps a specific VM object into an
+ * map.  Called by mmap for MAP_ANON, vm_mmap, shm_mmap, and vn_mmap.
  */
 int
-vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
-	vm_prot_t maxprot, int flags,
-	objtype_t handle_type, void *handle,
-	vm_ooffset_t foff)
+vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
+    vm_prot_t maxprot, int flags, vm_object_t object, vm_ooffset_t foff,
+    boolean_t writecounted, struct thread *td)
 {
 	boolean_t fitit;
-	vm_object_t object = NULL;
-	struct thread *td = curthread;
 	int docow, error, findspace, rv;
-	boolean_t writecounted;
-
-	if (size == 0)
-		return (0);
-
-	size = round_page(size);
 
 	if (map == &td->td_proc->p_vmspace->vm_map) {
 		PROC_LOCK(td->td_proc);
@@ -1586,11 +1511,11 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
 
 	/*
 	 * We currently can only deal with page aligned file offsets.
-	 * The check is here rather than in the syscall because the
-	 * kernel calls this function internally for other mmaping
-	 * operations (such as in exec) and non-aligned offsets will
-	 * cause pmap inconsistencies...so we want to be sure to
-	 * disallow this in all cases.
+	 * The mmap() system call already enforces this by subtracting
+	 * the page offset from the file offset, but checking here
+	 * catches errors in device drivers (e.g. d_single_mmap()
+	 * callbacks) and other internal mapping requests (such as in
+	 * exec).
 	 */
 	if (foff & PAGE_MASK)
 		return (EINVAL);
@@ -1603,44 +1528,11 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
 			return (EINVAL);
 		fitit = FALSE;
 	}
-	writecounted = FALSE;
 
-	/*
-	 * Lookup/allocate object.
-	 */
-	switch (handle_type) {
-	case OBJT_DEVICE:
-		error = vm_mmap_cdev(td, size, prot, &maxprot, &flags,
-		    handle, &foff, &object);
-		break;
-	case OBJT_VNODE:
-		error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
-		    handle, &foff, &object, &writecounted);
-		break;
-	case OBJT_SWAP:
-		error = vm_mmap_shm(td, size, prot, &maxprot, &flags,
-		    handle, foff, &object);
-		break;
-	case OBJT_DEFAULT:
-		if (handle == NULL) {
-			error = 0;
-			break;
-		}
-		/* FALLTHROUGH */
-	default:
-		error = EINVAL;
-		break;
-	}
-	if (error)
-		return (error);
 	if (flags & MAP_ANON) {
-		object = NULL;
+		if (object != NULL || foff != 0)
+			return (EINVAL);
 		docow = 0;
-		/*
-		 * Unnamed anonymous regions always start at 0.
-		 */
-		if (handle == 0)
-			foff = 0;
 	} else if (flags & MAP_PREFAULT_READ)
 		docow = MAP_PREFAULT;
 	else
@@ -1693,19 +1585,6 @@ vm_mmap(vm_map_t map, vm_offset_t *addr,
 			    VM_MAP_WIRE_USER | ((flags & MAP_STACK) ?
 			    VM_MAP_WIRE_HOLESOK : VM_MAP_WIRE_NOHOLES));
 		}
-	} else {
-		/*
-		 * If this mapping was accounted for in the vnode's
-		 * writecount, then undo that now.
-		 */
-		if (writecounted)
-			vnode_pager_release_writecount(object, 0, size);
-		/*
-		 * Lose the object reference.  Will destroy the
-		 * object if it's an unnamed anonymous mapping
-		 * or named anonymous without other references.
-		 */
-		vm_object_deallocate(object);
 	}
 	return (vm_mmap_to_errno(rv));
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201506041941.t54JfGAA075455>