Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 15 Jan 2003 18:25:14 -0800 (PST)
From:      Matthew Dillon <dillon@apollo.backplane.com>
To:        scsi@freebsd.org, gibbs@freebsd.org
Cc:        Tor.Egge@cvsup.no.freebsd.org, "Alan L. Cox" <alc@imimic.com>
Subject:   vmapbuf() changes effect CAM (patch)
Message-ID:  <200301160225.h0G2PE21019224@apollo.backplane.com>

next in thread | raw e-mail | index | archive | help
    Hi SCSI guys.  I would appreciate it if you could double check that the
    CAM portion of this patch correctly backs out the I/O operation when
    vmapbuf() fails.  I would like to commit it ASAP and you have a 
    MAINTAINERS entry for CAM.

    Alan, Tor, if you could review the whole patch I would appreciate it.
    Note that this is an incremental patch, it fixes the vmapbuf() race but
    does not yet clean up the useracc() checks (to just check that the
    address range is within the user VM space rather then also checking 
    the map entries).

					Thanks,

					Matthew Dillon 
					<dillon@backplane.com>


Index: cam/cam_periph.c
===================================================================
RCS file: /home/ncvs/src/sys/cam/cam_periph.c,v
retrieving revision 1.43
diff -u -r1.43 cam_periph.c
--- cam/cam_periph.c	14 Nov 2002 05:35:57 -0000	1.43
+++ cam/cam_periph.c	16 Jan 2003 00:20:06 -0000
@@ -534,7 +534,7 @@
 int
 cam_periph_mapmem(union ccb *ccb, struct cam_periph_map_info *mapinfo)
 {
-	int numbufs, i;
+	int numbufs, i, j;
 	int flags[CAM_PERIPH_MAXMAPS];
 	u_int8_t **data_ptrs[CAM_PERIPH_MAXMAPS];
 	u_int32_t lengths[CAM_PERIPH_MAXMAPS];
@@ -659,8 +659,28 @@
 		/* set the direction */
 		mapinfo->bp[i]->b_iocmd = flags[i];
 
-		/* map the buffer into kernel memory */
-		vmapbuf(mapinfo->bp[i]);
+		/*
+		 * Map the buffer into kernel memory.
+		 *
+		 * Note that useracc() alone is not a  sufficient test.
+		 * vmapbuf() can still fail due to a smaller file mapped
+		 * into a larger area of VM, or if userland races against
+		 * vmapbuf() after the useracc() check.
+		 */
+		if (vmapbuf(mapinfo->bp[i]) < 0) {
+			printf("cam_periph_mapmem: error, "
+				"address %p, length %lu isn't "
+				"user accessible any more\n",
+				(void *)*data_ptrs[i],
+				(u_long)lengths[i]);
+			for (j = 0; j < i; ++j) {
+				*data_ptrs[j] = mapinfo->bp[j]->b_saveaddr;
+				mapinfo->bp[j]->b_flags &= ~B_PHYS;
+				relpbuf(mapinfo->bp[j], NULL);
+			}
+			PRELE(curproc);
+			return(EACCES);
+		}
 
 		/* set our pointer to the new mapped area */
 		*data_ptrs[i] = mapinfo->bp[i]->b_data;
Index: kern/kern_physio.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_physio.c,v
retrieving revision 1.54
diff -u -r1.54 kern_physio.c
--- kern/kern_physio.c	3 Jan 2003 05:57:34 -0000	1.54
+++ kern/kern_physio.c	16 Jan 2003 00:16:39 -0000
@@ -95,13 +95,23 @@
 			bp->b_blkno = btodb(bp->b_offset);
 
 			if (uio->uio_segflg == UIO_USERSPACE) {
+				/*
+				 * Note that useracc() alone is not a
+				 * sufficient test.  vmapbuf() can still fail
+				 * due to a smaller file mapped into a larger
+				 * area of VM, or if userland races against
+				 * vmapbuf() after the useracc() check.
+				 */
 				if (!useracc(bp->b_data, bp->b_bufsize,
 				    bp->b_iocmd == BIO_READ ?
 				    VM_PROT_WRITE : VM_PROT_READ)) {
 					error = EFAULT;
 					goto doerror;
 				}
-				vmapbuf(bp);
+				if (vmapbuf(bp) < 0) {
+					error = EFAULT;
+					goto doerror;
+				}
 			}
 
 			DEV_STRATEGY(bp);
Index: kern/vfs_aio.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_aio.c,v
retrieving revision 1.152
diff -u -r1.152 vfs_aio.c
--- kern/vfs_aio.c	13 Jan 2003 00:28:55 -0000	1.152
+++ kern/vfs_aio.c	16 Jan 2003 00:17:40 -0000
@@ -1123,8 +1123,19 @@
 		}
 	}
 
-	/* Bring buffer into kernel space. */
-	vmapbuf(bp);
+	/*
+	 * Bring buffer into kernel space.
+	 *
+	 * Note that useracc() alone is not a 
+	 * sufficient test.  vmapbuf() can still fail
+	 * due to a smaller file mapped into a larger
+	 * area of VM, or if userland races against
+	 * vmapbuf() after the useracc() check.
+	 */
+	if (vmapbuf(bp) < 0) {
+		error = EFAULT;
+		goto doerror;
+	}
 
 	s = splbio();
 	aiocbe->bp = bp;
Index: kern/vfs_bio.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.355
diff -u -r1.355 vfs_bio.c
--- kern/vfs_bio.c	15 Jan 2003 23:54:34 -0000	1.355
+++ kern/vfs_bio.c	16 Jan 2003 00:12:57 -0000
@@ -3547,13 +3547,18 @@
  * All requests are (re)mapped into kernel VA space.
  * Notice that we use b_bufsize for the size of the buffer
  * to be mapped.  b_bcount might be modified by the driver.
+ *
+ * Note that even if the caller determines that the address space should
+ * be valid, a race or a smaller-file mapped into a larger space may
+ * actually cause vmapbuf() to fail, so all callers of vmapbuf() MUST
+ * check the return value.
  */
-void
+int
 vmapbuf(struct buf *bp)
 {
 	caddr_t addr, kva;
 	vm_offset_t pa;
-	int pidx;
+	int pidx, i;
 	struct vm_page *m;
 	struct pmap *pmap = &curproc->p_vmspace->vm_pmap;
 
@@ -3573,11 +3578,23 @@
 		 * the userland address space, and kextract is only guarenteed
 		 * to work for the kernland address space (see: sparc64 port).
 		 */
-		vm_fault_quick((addr >= bp->b_data) ? addr : bp->b_data,
-			(bp->b_iocmd == BIO_READ)?(VM_PROT_READ|VM_PROT_WRITE):VM_PROT_READ);
+retry:
+		i = vm_fault_quick((addr >= bp->b_data) ? addr : bp->b_data,
+			(bp->b_iocmd == BIO_READ) ?
+			(VM_PROT_READ|VM_PROT_WRITE) : VM_PROT_READ);
+		if (i < 0) {
+			printf("vmapbuf: warning, bad user address during I/O\n");
+			for (i = 0; i < pidx; ++i) {
+				vm_page_unhold(bp->b_pages[i]);
+				bp->b_pages[i] = NULL;
+			}
+			return(-1);
+		}
 		pa = trunc_page(pmap_extract(pmap, (vm_offset_t) addr));
-		if (pa == 0)
-			panic("vmapbuf: page not present");
+		if (pa == 0) {
+			printf("vmapbuf: warning, race against user address during I/O");
+			goto retry;
+		}
 		m = PHYS_TO_VM_PAGE(pa);
 		vm_page_hold(m);
 		bp->b_pages[pidx] = m;
@@ -3590,6 +3607,7 @@
 	bp->b_npages = pidx;
 	bp->b_saveaddr = bp->b_data;
 	bp->b_data = kva + (((vm_offset_t) bp->b_data) & PAGE_MASK);
+	return(0);
 }
 
 /*
Index: sys/buf.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/buf.h,v
retrieving revision 1.139
diff -u -r1.139 buf.h
--- sys/buf.h	3 Jan 2003 06:32:15 -0000	1.139
+++ sys/buf.h	16 Jan 2003 00:09:30 -0000
@@ -499,7 +499,7 @@
 void	vfs_busy_pages(struct buf *, int clear_modify);
 void	vfs_unbusy_pages(struct buf *);
 void	vwakeup(struct buf *);
-void	vmapbuf(struct buf *);
+int	vmapbuf(struct buf *);
 void	vunmapbuf(struct buf *);
 void	relpbuf(struct buf *, int *);
 void	brelvp(struct buf *);
Index: vm/vm_glue.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_glue.c,v
retrieving revision 1.161
diff -u -r1.161 vm_glue.c
--- vm/vm_glue.c	24 Dec 2002 04:24:58 -0000	1.161
+++ vm/vm_glue.c	16 Jan 2003 02:07:33 -0000
@@ -121,6 +121,12 @@
 
 /*
  * MPSAFE
+ *
+ * WARNING!  This code calls vm_map_check_protection() which only checks
+ * the associated vm_map_entry range.  It does not determine whether the
+ * contents of the memory is actually readable or writable.  In most cases
+ * just checking the vm_map_entry is sufficient within the kernel's address
+ * space.
  */
 int
 kernacc(addr, len, rw)
@@ -142,6 +148,12 @@
 
 /*
  * MPSAFE
+ *
+ * WARNING!  This code calls vm_map_check_protection() which only checks
+ * the associated vm_map_entry range.  It does not determine whether the
+ * contents of the memory is actually readable or writable.  vmapbuf(),
+ * vm_fault_quick(), or copyin()/copout()/su*()/fu*() functions should be
+ * used in conjuction with this call.
  */
 int
 useracc(addr, len, rw)
Index: vm/vm_map.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_map.c,v
retrieving revision 1.280
diff -u -r1.280 vm_map.c
--- vm/vm_map.c	13 Jan 2003 23:04:32 -0000	1.280
+++ vm/vm_map.c	16 Jan 2003 02:04:49 -0000
@@ -2179,9 +2179,14 @@
 /*
  *	vm_map_check_protection:
  *
- *	Assert that the target map allows the specified
- *	privilege on the entire address region given.
- *	The entire region must be allocated.
+ *	Assert that the target map allows the specified privilege on the
+ *	entire address region given.  The entire region must be allocated.
+ *
+ *	WARNING!  This code does not and should not check whether the
+ *	contents of the region is accessible.  For example a smaller file
+ *	might be mapped into a larger address space.
+ *
+ *	NOTE!  This code is also called by munmap().
  */
 boolean_t
 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-scsi" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200301160225.h0G2PE21019224>