Date: Fri, 24 Jul 1998 05:45:36 -0400 (EDT) From: Luoqi Chen <luoqi@watermarkgroup.com> To: green@zone.baldcom.net, jkh@time.cdrom.com Cc: bright@hotjobs.com, freebsd-current@FreeBSD.ORG, joelh@gnu.org Subject: Re: vn subsystem Message-ID: <199807240945.FAA20438@lor.watermarkgroup.com>
next in thread | raw e-mail | index | archive | help
> > mount /dev/vn0 /msdos (oops, should this matter anyway?)
> > mount_msdos /dev/vn0 /msdos
>
> Try it without the bad mount first. This sounds an awful lot like
> another PR which has nothing to do with vn (e.g. it would do the *same*
> thing if you tried to do this with a non-vn device).
>
> - Jordan
>
I took a look at this problem, I found there're some bugs in VMIO code
when dealing with buf at a non-page-aligned blkno, e.g. reading one page
size of data at block 1 from a block device, as Brian Feldman's core dump
shows, since the buf does not start at a page bounary, it should span
two pages, yet only one page is allocated in the current code, and
subsequent write to the 2nd page would result in a fault. I took a shot
at fixing this problem, resulted in the patch below. Would any knowledgeable
person please take a look at the patch? I've found no ill effect so far
(I just finished a successful buildworld with a patched kernel).
Thanks
-lq
Index: vfs_bio.c
===================================================================
RCS file: /fun/cvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.167
diff -u -r1.167 vfs_bio.c
--- vfs_bio.c 1998/07/13 07:05:55 1.167
+++ vfs_bio.c 1998/07/24 08:14:34
@@ -1325,6 +1325,7 @@
if (vm_page_is_valid(m,
(vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
return 0;
+ tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
}
return 1;
}
@@ -1367,9 +1368,9 @@
break;
}
}
- boffset = (i << PAGE_SHIFT);
+ boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
if (boffset < bp->b_dirtyoff) {
- bp->b_dirtyoff = boffset;
+ bp->b_dirtyoff = max(boffset, 0);
}
/*
@@ -1381,11 +1382,14 @@
}
}
boffset = (i + 1);
+#if 0
offset = boffset + bp->b_pages[0]->pindex;
if (offset >= object->size)
boffset = object->size - bp->b_pages[0]->pindex;
- if (bp->b_dirtyend < (boffset << PAGE_SHIFT))
- bp->b_dirtyend = (boffset << PAGE_SHIFT);
+#endif
+ boffset = (boffset << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
+ if (bp->b_dirtyend < boffset)
+ bp->b_dirtyend = min(boffset, bp->b_bufsize);
}
}
@@ -1398,21 +1402,9 @@
struct buf *bp;
int i, s;
struct bufhashhdr *bh;
- int maxsize;
int generation;
int checksize;
- if (vp->v_mount) {
- maxsize = vp->v_mount->mnt_stat.f_iosize;
- /*
- * This happens on mount points.
- */
- if (maxsize < size)
- maxsize = size;
- } else {
- maxsize = size;
- }
-
#if !defined(MAX_PERF)
if (size > MAXBSIZE)
panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
@@ -1503,7 +1495,22 @@
splx(s);
return (bp);
} else {
- vm_object_t obj;
+ int bsize, maxsize, vmio;
+ off_t offset;
+
+ if (vp->v_type == VBLK)
+ bsize = DEV_BSIZE;
+ else if (vp->v_mountedhere)
+ bsize = vp->v_mountedhere->mnt_stat.f_iosize;
+ else if (vp->v_mount)
+ bsize = vp->v_mount->mnt_stat.f_iosize;
+ else
+ bsize = size;
+
+ offset = (off_t)blkno * bsize;
+ vmio = (vp->v_object != 0) && (vp->v_flag & VOBJBUF);
+ maxsize = vmio ? size + (offset & PAGE_MASK) : size;
+ maxsize = imax(maxsize, bsize);
if ((bp = getnewbuf(vp, blkno,
slpflag, slptimeo, size, maxsize)) == 0) {
@@ -1531,18 +1538,14 @@
* be found by incore.
*/
bp->b_blkno = bp->b_lblkno = blkno;
-
- if (vp->v_type != VBLK)
- bp->b_offset = (off_t) blkno * maxsize;
- else
- bp->b_offset = (off_t) blkno * DEV_BSIZE;
+ bp->b_offset = offset;
bgetvp(vp, bp);
LIST_REMOVE(bp, b_hash);
bh = BUFHASH(vp, blkno);
LIST_INSERT_HEAD(bh, bp, b_hash);
- if ((obj = vp->v_object) && (vp->v_flag & VOBJBUF)) {
+ if (vmio) {
bp->b_flags |= (B_VMIO | B_CACHE);
#if defined(VFS_BIO_DEBUG)
if (vp->v_type != VREG && vp->v_type != VBLK)
@@ -1695,7 +1698,8 @@
int desiredpages;
newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
- desiredpages = (round_page(newbsize) >> PAGE_SHIFT);
+ desiredpages = size == 0 ? 0 :
+ num_pages((bp->b_offset & PAGE_MASK) + newbsize);
#if !defined(NO_B_MALLOC)
if (bp->b_flags & B_MALLOC)
@@ -1744,8 +1748,6 @@
if (bp->b_npages < desiredpages) {
obj = vp->v_object;
tinc = PAGE_SIZE;
- if (tinc > bsize)
- tinc = bsize;
off = bp->b_offset;
#ifdef DIAGNOSTIC
@@ -1759,10 +1761,9 @@
bp->b_validend = orig_validend;
bp->b_flags |= B_CACHE;
for (toff = 0; toff < newbsize; toff += tinc) {
- int bytesinpage;
-
- pageindex = toff >> PAGE_SHIFT;
objoff = OFF_TO_IDX(off + toff);
+ pageindex = objoff - OFF_TO_IDX(off);
+ tinc = PAGE_SIZE - ((off + toff) & PAGE_MASK);
if (pageindex < curbpnpages) {
m = bp->b_pages[pageindex];
@@ -1770,11 +1771,10 @@
if (m->pindex != objoff)
panic("allocbuf: page changed offset??!!!?");
#endif
- bytesinpage = tinc;
if (tinc > (newbsize - toff))
- bytesinpage = newbsize - toff;
+ tinc = newbsize - toff;
if (bp->b_flags & B_CACHE)
- vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
+ vfs_buf_set_valid(bp, off, toff, tinc, m);
continue;
}
m = vm_page_lookup(obj, objoff);
@@ -1782,7 +1782,7 @@
m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL);
if (!m) {
VM_WAIT;
- vm_pageout_deficit += (desiredpages - bp->b_npages);
+ vm_pageout_deficit += (desiredpages - curbpnpages);
goto doretry;
}
@@ -1805,11 +1805,10 @@
(cnt.v_free_min + cnt.v_cache_min))) {
pagedaemon_wakeup();
}
- bytesinpage = tinc;
if (tinc > (newbsize - toff))
- bytesinpage = newbsize - toff;
+ tinc = newbsize - toff;
if (bp->b_flags & B_CACHE)
- vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
+ vfs_buf_set_valid(bp, off, toff, tinc, m);
m->flags &= ~PG_ZERO;
vm_page_wire(m);
}
@@ -2154,7 +2153,7 @@
* This only bothers with the first valid range in the
* page.
*/
- svalid = off;
+ svalid = trunc_page(foff + off) - foff;
while (validbits && !(validbits & 1)) {
svalid += DEV_BSIZE;
validbits >>= 1;
@@ -2164,6 +2163,7 @@
evalid += DEV_BSIZE;
validbits >>= 1;
}
+ evalid = min(evalid, off + size);
/*
* Make sure this range is contiguous with the range
* built up from previous pages. If not, then we will
@@ -2192,15 +2192,14 @@
vm_ooffset_t soff, eoff;
soff = off;
- eoff = off + min(PAGE_SIZE, bp->b_bufsize);
+ eoff = min(off + PAGE_SIZE, bp->b_offset + bp->b_bufsize);
if (vp->v_tag == VT_NFS && vp->v_type != VBLK) {
vm_ooffset_t sv, ev;
vm_page_set_invalid(m,
(vm_offset_t) (soff & PAGE_MASK),
(vm_offset_t) (eoff - soff));
- off = off - pageno * PAGE_SIZE;
- sv = off + ((bp->b_validoff + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1));
- ev = off + ((bp->b_validend + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1));
+ sv = (bp->b_offset + bp->b_validoff + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
+ ev = (bp->b_offset + bp->b_validend) & ~(DEV_BSIZE - 1);
soff = qmax(sv, soff);
eoff = qmin(ev, eoff);
}
@@ -2285,18 +2284,21 @@
panic("vfs_clean_pages: no buffer offset");
#endif
- for (i = 0; i < bp->b_npages; i++, foff += PAGE_SIZE) {
+ for (i = 0; i < bp->b_npages; i++) {
vm_page_t m = bp->b_pages[i];
vfs_page_set_valid(bp, foff, i, m);
+ foff = trunc_page(foff + PAGE_SIZE);
}
}
}
void
vfs_bio_clrbuf(struct buf *bp) {
- int i;
+ int i, size;
+ caddr_t sa, ea;
if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
- if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) {
+ if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
+ (bp->b_offset & PAGE_MASK) == 0) {
int mask;
mask = 0;
for(i=0;i<bp->b_bufsize;i+=DEV_BSIZE)
@@ -2309,19 +2311,23 @@
bp->b_resid = 0;
return;
}
- for(i=0;i<bp->b_npages;i++) {
+ ea = sa = bp->b_data;
+ for(i=0;i<bp->b_npages;i++,sa=ea) {
+ ea = (caddr_t)trunc_page((vm_offset_t)sa + PAGE_SIZE);
+ ea = (caddr_t)ulmin((u_long)ea,
+ (u_long)bp->b_data + bp->b_bufsize);
if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL)
continue;
if( bp->b_pages[i]->valid == 0) {
if ((bp->b_pages[i]->flags & PG_ZERO) == 0) {
- bzero(bp->b_data + (i << PAGE_SHIFT), PAGE_SIZE);
+ bzero(sa, ea - sa);
}
} else {
int j;
- for(j=0;j<PAGE_SIZE/DEV_BSIZE;j++) {
+ for(j=0;j<PAGE_SIZE/DEV_BSIZE&&sa<ea;j++,sa+=DEV_BSIZE) {
if (((bp->b_pages[i]->flags & PG_ZERO) == 0) &&
(bp->b_pages[i]->valid & (1<<j)) == 0)
- bzero(bp->b_data + (i << PAGE_SHIFT) + j * DEV_BSIZE, DEV_BSIZE);
+ bzero(sa, DEV_BSIZE);
}
}
bp->b_pages[i]->valid = VM_PAGE_BITS_ALL;
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199807240945.FAA20438>
