Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 24 Jul 1998 05:45:36 -0400 (EDT)
From:      Luoqi Chen <luoqi@watermarkgroup.com>
To:        green@zone.baldcom.net, jkh@time.cdrom.com
Cc:        bright@hotjobs.com, freebsd-current@FreeBSD.ORG, joelh@gnu.org
Subject:   Re: vn subsystem
Message-ID:  <199807240945.FAA20438@lor.watermarkgroup.com>

next in thread | raw e-mail | index | archive | help
> > mount /dev/vn0 /msdos (oops, should this matter anyway?)
> > mount_msdos /dev/vn0 /msdos
> 
> Try it without the bad mount first.  This sounds an awful lot like
> another PR which has nothing to do with vn (e.g. it would do the *same*
> thing if you tried to do this with a non-vn device).
> 
> - Jordan
> 
I took a look at this problem, I found there're some bugs in VMIO code
when dealing with buf at a non-page-aligned blkno, e.g. reading one page
size of data at block 1 from a block device, as Brian Feldman's core dump
shows, since the buf does not start at a page bounary, it should span
two pages, yet only one page is allocated in the current code, and
subsequent write to the 2nd page would result in a fault. I took a shot
at fixing this problem, resulted in the patch below. Would any knowledgeable
person please take a look at the patch? I've found no ill effect so far
(I just finished a successful buildworld with a patched kernel).

Thanks
-lq

Index: vfs_bio.c
===================================================================
RCS file: /fun/cvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.167
diff -u -r1.167 vfs_bio.c
--- vfs_bio.c	1998/07/13 07:05:55	1.167
+++ vfs_bio.c	1998/07/24 08:14:34
@@ -1325,6 +1325,7 @@
 		if (vm_page_is_valid(m,
 		    (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
 			return 0;
+		tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
 	}
 	return 1;
 }
@@ -1367,9 +1368,9 @@
 				break;
 			}
 		}
-		boffset = (i << PAGE_SHIFT);
+		boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 		if (boffset < bp->b_dirtyoff) {
-			bp->b_dirtyoff = boffset;
+			bp->b_dirtyoff = max(boffset, 0);
 		}
 
 		/*
@@ -1381,11 +1382,14 @@
 			}
 		}
 		boffset = (i + 1);
+#if 0
 		offset = boffset + bp->b_pages[0]->pindex;
 		if (offset >= object->size)
 			boffset = object->size - bp->b_pages[0]->pindex;
-		if (bp->b_dirtyend < (boffset << PAGE_SHIFT))
-			bp->b_dirtyend = (boffset << PAGE_SHIFT);
+#endif
+		boffset = (boffset << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
+		if (bp->b_dirtyend < boffset)
+			bp->b_dirtyend = min(boffset, bp->b_bufsize);
 	}
 }
 
@@ -1398,21 +1402,9 @@
 	struct buf *bp;
 	int i, s;
 	struct bufhashhdr *bh;
-	int maxsize;
 	int generation;
 	int checksize;
 
-	if (vp->v_mount) {
-		maxsize = vp->v_mount->mnt_stat.f_iosize;
-		/*
-		 * This happens on mount points.
-		 */
-		if (maxsize < size)
-			maxsize = size;
-	} else {
-		maxsize = size;
-	}
-
 #if !defined(MAX_PERF)
 	if (size > MAXBSIZE)
 		panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
@@ -1503,7 +1495,22 @@
 		splx(s);
 		return (bp);
 	} else {
-		vm_object_t obj;
+		int bsize, maxsize, vmio;
+		off_t offset;
+
+		if (vp->v_type == VBLK)
+			bsize = DEV_BSIZE;
+		else if (vp->v_mountedhere)
+			bsize = vp->v_mountedhere->mnt_stat.f_iosize;
+		else if (vp->v_mount)
+			bsize = vp->v_mount->mnt_stat.f_iosize;
+		else
+			bsize = size;
+
+		offset = (off_t)blkno * bsize;
+		vmio = (vp->v_object != 0) && (vp->v_flag & VOBJBUF);
+		maxsize = vmio ? size + (offset & PAGE_MASK) : size;
+		maxsize = imax(maxsize, bsize);
 
 		if ((bp = getnewbuf(vp, blkno,
 			slpflag, slptimeo, size, maxsize)) == 0) {
@@ -1531,18 +1538,14 @@
 		 * be found by incore.
 		 */
 		bp->b_blkno = bp->b_lblkno = blkno;
-
-		if (vp->v_type != VBLK)
-			bp->b_offset = (off_t) blkno * maxsize;
-		else
-			bp->b_offset = (off_t) blkno * DEV_BSIZE;
+		bp->b_offset = offset;
 
 		bgetvp(vp, bp);
 		LIST_REMOVE(bp, b_hash);
 		bh = BUFHASH(vp, blkno);
 		LIST_INSERT_HEAD(bh, bp, b_hash);
 
-		if ((obj = vp->v_object) && (vp->v_flag & VOBJBUF)) {
+		if (vmio) {
 			bp->b_flags |= (B_VMIO | B_CACHE);
 #if defined(VFS_BIO_DEBUG)
 			if (vp->v_type != VREG && vp->v_type != VBLK)
@@ -1695,7 +1698,8 @@
 		int desiredpages;
 
 		newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
-		desiredpages = (round_page(newbsize) >> PAGE_SHIFT);
+		desiredpages = size == 0 ? 0 :
+			num_pages((bp->b_offset & PAGE_MASK) + newbsize);
 
 #if !defined(NO_B_MALLOC)
 		if (bp->b_flags & B_MALLOC)
@@ -1744,8 +1748,6 @@
 			if (bp->b_npages < desiredpages) {
 				obj = vp->v_object;
 				tinc = PAGE_SIZE;
-				if (tinc > bsize)
-					tinc = bsize;
 
 				off = bp->b_offset;
 #ifdef DIAGNOSTIC
@@ -1759,10 +1761,9 @@
 				bp->b_validend = orig_validend;
 				bp->b_flags |= B_CACHE;
 				for (toff = 0; toff < newbsize; toff += tinc) {
-					int bytesinpage;
-
-					pageindex = toff >> PAGE_SHIFT;
 					objoff = OFF_TO_IDX(off + toff);
+					pageindex = objoff - OFF_TO_IDX(off);
+					tinc = PAGE_SIZE - ((off + toff) & PAGE_MASK);
 					if (pageindex < curbpnpages) {
 
 						m = bp->b_pages[pageindex];
@@ -1770,11 +1771,10 @@
 						if (m->pindex != objoff)
 							panic("allocbuf: page changed offset??!!!?");
 #endif
-						bytesinpage = tinc;
 						if (tinc > (newbsize - toff))
-							bytesinpage = newbsize - toff;
+							tinc = newbsize - toff;
 						if (bp->b_flags & B_CACHE)
-							vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
+							vfs_buf_set_valid(bp, off, toff, tinc, m);
 						continue;
 					}
 					m = vm_page_lookup(obj, objoff);
@@ -1782,7 +1782,7 @@
 						m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL);
 						if (!m) {
 							VM_WAIT;
-							vm_pageout_deficit += (desiredpages - bp->b_npages);
+							vm_pageout_deficit += (desiredpages - curbpnpages);
 							goto doretry;
 						}
 
@@ -1805,11 +1805,10 @@
 								(cnt.v_free_min + cnt.v_cache_min))) {
 							pagedaemon_wakeup();
 						}
-						bytesinpage = tinc;
 						if (tinc > (newbsize - toff))
-							bytesinpage = newbsize - toff;
+							tinc = newbsize - toff;
 						if (bp->b_flags & B_CACHE)
-							vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
+							vfs_buf_set_valid(bp, off, toff, tinc, m);
 						m->flags &= ~PG_ZERO;
 						vm_page_wire(m);
 					}
@@ -2154,7 +2153,7 @@
 		 * This only bothers with the first valid range in the
 		 * page.
 		 */
-		svalid = off;
+		svalid = trunc_page(foff + off) - foff;
 		while (validbits && !(validbits & 1)) {
 			svalid += DEV_BSIZE;
 			validbits >>= 1;
@@ -2164,6 +2163,7 @@
 			evalid += DEV_BSIZE;
 			validbits >>= 1;
 		}
+		evalid = min(evalid, off + size);
 		/*
 		 * Make sure this range is contiguous with the range
 		 * built up from previous pages.  If not, then we will
@@ -2192,15 +2192,14 @@
 	vm_ooffset_t soff, eoff;
 
 	soff = off;
-	eoff = off + min(PAGE_SIZE, bp->b_bufsize);
+	eoff = min(off + PAGE_SIZE, bp->b_offset + bp->b_bufsize);
 	if (vp->v_tag == VT_NFS && vp->v_type != VBLK) {
 		vm_ooffset_t sv, ev;
 		vm_page_set_invalid(m,
 		    (vm_offset_t) (soff & PAGE_MASK),
 		    (vm_offset_t) (eoff - soff));
-		off = off - pageno * PAGE_SIZE;
-		sv = off + ((bp->b_validoff + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1));
-		ev = off + ((bp->b_validend + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1));
+		sv = (bp->b_offset + bp->b_validoff + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
+		ev = (bp->b_offset + bp->b_validend) & ~(DEV_BSIZE - 1);
 		soff = qmax(sv, soff);
 		eoff = qmin(ev, eoff);
 	}
@@ -2285,18 +2284,21 @@
 			panic("vfs_clean_pages: no buffer offset");
 #endif
 
-		for (i = 0; i < bp->b_npages; i++, foff += PAGE_SIZE) {
+		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			vfs_page_set_valid(bp, foff, i, m);
+			foff = trunc_page(foff + PAGE_SIZE);
 		}
 	}
 }
 
 void
 vfs_bio_clrbuf(struct buf *bp) {
-	int i;
+	int i, size;
+	caddr_t sa, ea;
 	if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
-		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) {
+		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
+		    (bp->b_offset & PAGE_MASK) == 0) {
 			int mask;
 			mask = 0;
 			for(i=0;i<bp->b_bufsize;i+=DEV_BSIZE)
@@ -2309,19 +2311,23 @@
 			bp->b_resid = 0;
 			return;
 		}
-		for(i=0;i<bp->b_npages;i++) {
+		ea = sa = bp->b_data;
+		for(i=0;i<bp->b_npages;i++,sa=ea) {
+			ea = (caddr_t)trunc_page((vm_offset_t)sa + PAGE_SIZE);
+			ea = (caddr_t)ulmin((u_long)ea,
+				(u_long)bp->b_data + bp->b_bufsize);
 			if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL)
 				continue;
 			if( bp->b_pages[i]->valid == 0) {
 				if ((bp->b_pages[i]->flags & PG_ZERO) == 0) {
-					bzero(bp->b_data + (i << PAGE_SHIFT), PAGE_SIZE);
+					bzero(sa, ea - sa);
 				}
 			} else {
 				int j;
-				for(j=0;j<PAGE_SIZE/DEV_BSIZE;j++) {
+				for(j=0;j<PAGE_SIZE/DEV_BSIZE&&sa<ea;j++,sa+=DEV_BSIZE) {
 					if (((bp->b_pages[i]->flags & PG_ZERO) == 0) &&
 						(bp->b_pages[i]->valid & (1<<j)) == 0)
-						bzero(bp->b_data + (i << PAGE_SHIFT) + j * DEV_BSIZE, DEV_BSIZE);
+						bzero(sa, DEV_BSIZE);
 				}
 			}
 			bp->b_pages[i]->valid = VM_PAGE_BITS_ALL;

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199807240945.FAA20438>