Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 27 Jul 1998 13:13:55 -0400 (EDT)
From:      Luoqi Chen <luoqi@chen.ml.org>
To:        FreeBSD-gnats-submit@FreeBSD.ORG
Subject:   kern/7415: VMIO bug for FS with sub-page-size blocks (with fix)
Message-ID:  <199807271713.NAA07229@chen.ml.org>

index | next in thread | raw e-mail


>Number:         7415
>Category:       kern
>Synopsis:       VMIO bug for FS with sub-page-size blocks
>Confidential:   no
>Severity:       serious
>Priority:       high
>Responsible:    freebsd-bugs
>State:          open
>Quarter:
>Keywords:
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Mon Jul 27 10:20:01 PDT 1998
>Last-Modified:
>Originator:     Luoqi Chen
>Organization:
>Release:        FreeBSD 3.0-CURRENT i386
>Environment:

	3.0-current as of 7/27/98

>Description:

	There are some bugs in VMIO code when dealing with blocks not
	on a page-boundary. They may cause crash for FS with sub-page
	sized blocks (e.g. MSDOSFS), or VMIO enabled block devices
	(which uses 512-byte blocks).

>How-To-Repeat:

	First, create an file of size ~40K,
		dd if=/dev/zero of=/tmp/image bs=4096 count=10240
	config a vn device on top it,
		vnconfig -c /dev/vn0c /dev/zero
	create an msdos fs,
		newfs_msdsos /dev/rvn0c
	Now, try to mount it as an *ffs*,
		mount /dev/vn0c /dos
	the mount would fail, but it has the side effect of enabling
	VMIO on /dev/vn0c.

	Last, mount as an msdosfs,
		mount_msdos /dev/vn0c /dos
	and you will get a panic about "fault on nofault entry".

>Fix:
	
	The following patch should fix the problem. I hope this one
	won't silently rot away as some other patches I submitted,
	this is (IMO) a quite serious problem, and took me quite some
	time to figure it out.

Index: vfs_bio.c
===================================================================
RCS file: /fun/cvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.167
diff -u -r1.167 vfs_bio.c
--- vfs_bio.c	1998/07/13 07:05:55	1.167
+++ vfs_bio.c	1998/07/24 18:24:18
@@ -1325,6 +1325,7 @@
 		if (vm_page_is_valid(m,
 		    (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
 			return 0;
+		tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
 	}
 	return 1;
 }
@@ -1367,9 +1368,9 @@
 				break;
 			}
 		}
-		boffset = (i << PAGE_SHIFT);
+		boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 		if (boffset < bp->b_dirtyoff) {
-			bp->b_dirtyoff = boffset;
+			bp->b_dirtyoff = max(boffset, 0);
 		}
 
 		/*
@@ -1381,11 +1382,14 @@
 			}
 		}
 		boffset = (i + 1);
+#if 0
 		offset = boffset + bp->b_pages[0]->pindex;
 		if (offset >= object->size)
 			boffset = object->size - bp->b_pages[0]->pindex;
-		if (bp->b_dirtyend < (boffset << PAGE_SHIFT))
-			bp->b_dirtyend = (boffset << PAGE_SHIFT);
+#endif
+		boffset = (boffset << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
+		if (bp->b_dirtyend < boffset)
+			bp->b_dirtyend = min(boffset, bp->b_bufsize);
 	}
 }
 
@@ -1398,21 +1402,9 @@
 	struct buf *bp;
 	int i, s;
 	struct bufhashhdr *bh;
-	int maxsize;
 	int generation;
 	int checksize;
 
-	if (vp->v_mount) {
-		maxsize = vp->v_mount->mnt_stat.f_iosize;
-		/*
-		 * This happens on mount points.
-		 */
-		if (maxsize < size)
-			maxsize = size;
-	} else {
-		maxsize = size;
-	}
-
 #if !defined(MAX_PERF)
 	if (size > MAXBSIZE)
 		panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
@@ -1503,7 +1495,22 @@
 		splx(s);
 		return (bp);
 	} else {
-		vm_object_t obj;
+		int bsize, maxsize, vmio;
+		off_t offset;
+
+		if (vp->v_type == VBLK)
+			bsize = DEV_BSIZE;
+		else if (vp->v_mountedhere)
+			bsize = vp->v_mountedhere->mnt_stat.f_iosize;
+		else if (vp->v_mount)
+			bsize = vp->v_mount->mnt_stat.f_iosize;
+		else
+			bsize = size;
+
+		offset = (off_t)blkno * bsize;
+		vmio = (vp->v_object != 0) && (vp->v_flag & VOBJBUF);
+		maxsize = vmio ? size + (offset & PAGE_MASK) : size;
+		maxsize = imax(maxsize, bsize);
 
 		if ((bp = getnewbuf(vp, blkno,
 			slpflag, slptimeo, size, maxsize)) == 0) {
@@ -1531,18 +1538,14 @@
 		 * be found by incore.
 		 */
 		bp->b_blkno = bp->b_lblkno = blkno;
-
-		if (vp->v_type != VBLK)
-			bp->b_offset = (off_t) blkno * maxsize;
-		else
-			bp->b_offset = (off_t) blkno * DEV_BSIZE;
+		bp->b_offset = offset;
 
 		bgetvp(vp, bp);
 		LIST_REMOVE(bp, b_hash);
 		bh = BUFHASH(vp, blkno);
 		LIST_INSERT_HEAD(bh, bp, b_hash);
 
-		if ((obj = vp->v_object) && (vp->v_flag & VOBJBUF)) {
+		if (vmio) {
 			bp->b_flags |= (B_VMIO | B_CACHE);
 #if defined(VFS_BIO_DEBUG)
 			if (vp->v_type != VREG && vp->v_type != VBLK)
@@ -1695,7 +1698,8 @@
 		int desiredpages;
 
 		newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
-		desiredpages = (round_page(newbsize) >> PAGE_SHIFT);
+		desiredpages = size == 0 ? 0 :
+			num_pages((bp->b_offset & PAGE_MASK) + newbsize);
 
 #if !defined(NO_B_MALLOC)
 		if (bp->b_flags & B_MALLOC)
@@ -1744,8 +1748,6 @@
 			if (bp->b_npages < desiredpages) {
 				obj = vp->v_object;
 				tinc = PAGE_SIZE;
-				if (tinc > bsize)
-					tinc = bsize;
 
 				off = bp->b_offset;
 #ifdef DIAGNOSTIC
@@ -1759,10 +1761,9 @@
 				bp->b_validend = orig_validend;
 				bp->b_flags |= B_CACHE;
 				for (toff = 0; toff < newbsize; toff += tinc) {
-					int bytesinpage;
-
-					pageindex = toff >> PAGE_SHIFT;
 					objoff = OFF_TO_IDX(off + toff);
+					pageindex = objoff - OFF_TO_IDX(off);
+					tinc = PAGE_SIZE - ((off + toff) & PAGE_MASK);
 					if (pageindex < curbpnpages) {
 
 						m = bp->b_pages[pageindex];
@@ -1770,11 +1771,10 @@
 						if (m->pindex != objoff)
 							panic("allocbuf: page changed offset??!!!?");
 #endif
-						bytesinpage = tinc;
 						if (tinc > (newbsize - toff))
-							bytesinpage = newbsize - toff;
+							tinc = newbsize - toff;
 						if (bp->b_flags & B_CACHE)
-							vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
+							vfs_buf_set_valid(bp, off, toff, tinc, m);
 						continue;
 					}
 					m = vm_page_lookup(obj, objoff);
@@ -1782,7 +1782,7 @@
 						m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL);
 						if (!m) {
 							VM_WAIT;
-							vm_pageout_deficit += (desiredpages - bp->b_npages);
+							vm_pageout_deficit += (desiredpages - curbpnpages);
 							goto doretry;
 						}
 
@@ -1805,11 +1805,10 @@
 								(cnt.v_free_min + cnt.v_cache_min))) {
 							pagedaemon_wakeup();
 						}
-						bytesinpage = tinc;
 						if (tinc > (newbsize - toff))
-							bytesinpage = newbsize - toff;
+							tinc = newbsize - toff;
 						if (bp->b_flags & B_CACHE)
-							vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
+							vfs_buf_set_valid(bp, off, toff, tinc, m);
 						m->flags &= ~PG_ZERO;
 						vm_page_wire(m);
 					}
@@ -2154,7 +2153,7 @@
 		 * This only bothers with the first valid range in the
 		 * page.
 		 */
-		svalid = off;
+		svalid = ((foff + off) & ~PAGE_MASK) - foff;
 		while (validbits && !(validbits & 1)) {
 			svalid += DEV_BSIZE;
 			validbits >>= 1;
@@ -2164,6 +2163,7 @@
 			evalid += DEV_BSIZE;
 			validbits >>= 1;
 		}
+		evalid = min(evalid, off + size);
 		/*
 		 * Make sure this range is contiguous with the range
 		 * built up from previous pages.  If not, then we will
@@ -2192,15 +2192,14 @@
 	vm_ooffset_t soff, eoff;
 
 	soff = off;
-	eoff = off + min(PAGE_SIZE, bp->b_bufsize);
+	eoff = qmin(off + PAGE_SIZE, bp->b_offset + bp->b_bufsize);
 	if (vp->v_tag == VT_NFS && vp->v_type != VBLK) {
 		vm_ooffset_t sv, ev;
 		vm_page_set_invalid(m,
 		    (vm_offset_t) (soff & PAGE_MASK),
 		    (vm_offset_t) (eoff - soff));
-		off = off - pageno * PAGE_SIZE;
-		sv = off + ((bp->b_validoff + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1));
-		ev = off + ((bp->b_validend + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1));
+		sv = (bp->b_offset + bp->b_validoff + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
+		ev = (bp->b_offset + bp->b_validend) & ~(DEV_BSIZE - 1);
 		soff = qmax(sv, soff);
 		eoff = qmin(ev, eoff);
 	}
@@ -2285,18 +2284,21 @@
 			panic("vfs_clean_pages: no buffer offset");
 #endif
 
-		for (i = 0; i < bp->b_npages; i++, foff += PAGE_SIZE) {
+		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			vfs_page_set_valid(bp, foff, i, m);
+			foff = (foff + PAGE_SIZE) & ~PAGE_MASK;
 		}
 	}
 }
 
 void
 vfs_bio_clrbuf(struct buf *bp) {
-	int i;
+	int i, size;
+	caddr_t sa, ea;
 	if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
-		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) {
+		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
+		    (bp->b_offset & PAGE_MASK) == 0) {
 			int mask;
 			mask = 0;
 			for(i=0;i<bp->b_bufsize;i+=DEV_BSIZE)
@@ -2309,19 +2311,23 @@
 			bp->b_resid = 0;
 			return;
 		}
-		for(i=0;i<bp->b_npages;i++) {
+		ea = sa = bp->b_data;
+		for(i=0;i<bp->b_npages;i++,sa=ea) {
+			ea = (caddr_t)trunc_page(sa + PAGE_SIZE);
+			ea = (caddr_t)ulmin((u_long)ea,
+				(u_long)bp->b_data + bp->b_bufsize);
 			if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL)
 				continue;
 			if( bp->b_pages[i]->valid == 0) {
 				if ((bp->b_pages[i]->flags & PG_ZERO) == 0) {
-					bzero(bp->b_data + (i << PAGE_SHIFT), PAGE_SIZE);
+					bzero(sa, ea - sa);
 				}
 			} else {
-				int j;
-				for(j=0;j<PAGE_SIZE/DEV_BSIZE;j++) {
+				int j = ((u_long)sa & PAGE_MASK) / DEV_BSIZE;
+				for (; sa < ea; sa += DEV_BSIZE, j++) {
 					if (((bp->b_pages[i]->flags & PG_ZERO) == 0) &&
 						(bp->b_pages[i]->valid & (1<<j)) == 0)
-						bzero(bp->b_data + (i << PAGE_SHIFT) + j * DEV_BSIZE, DEV_BSIZE);
+						bzero(sa, DEV_BSIZE);
 				}
 			}
 			bp->b_pages[i]->valid = VM_PAGE_BITS_ALL;
>Audit-Trail:
>Unformatted:

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-bugs" in the body of the message


help

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199807271713.NAA07229>