Date: Sat, 17 May 1997 16:52:46 +0100 (BST) From: Doug Rabson <dfr@nlsystems.com> To: Christoph Kukulies <kuku@gilberto.physik.rwth-aachen.de> Cc: dyson@freebsd.org, current@freebsd.org Subject: Re: can anyone confirm my NFS/VM problem? Message-ID: <Pine.BSF.3.95q.970517164819.5082B-100000@herring.nlsystems.com> In-Reply-To: <199705161047.MAA00774@gil.physik.rwth-aachen.de>
next in thread | previous in thread | raw e-mail | index | archive | help
On Fri, 16 May 1997, Christoph Kukulies wrote: > > I have created a test case to repeat that weird NFS/VM problem I > have here with a physics production program - the exposed bug is > really a showstopper for our production and I'm not sure if it is > not also in 2.2.1R. > (Don't know if Doug is already going to deal with this group > of NFS/VM/MMAP problems). Anyway, to assure it is really a > repeatable problem independent of some local peculiarity here > at my site, I'm seeking for someone with the following > configuration: Well I think I have a fix. It was bloody painful too. There were some extremely ugly problems associated with NFS' use of b_validoff and b_validend. I attempted to fix them but it still feels pretty fragile although it does pass Ghristoph's test case. John, could you have a look at these changes and tell me whether they make sense? Index: kern/vfs_bio.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_bio.c,v retrieving revision 1.115 diff -u -r1.115 vfs_bio.c --- vfs_bio.c 1997/05/10 09:09:42 1.115 +++ vfs_bio.c 1997/05/17 15:37:52 @@ -1215,6 +1215,10 @@ splx(s); allocbuf(bp, size); + if ((bp->b_flags & (B_VMIO|B_CACHE)) == (B_VMIO|B_CACHE)) { + bp->b_validoff = 0; + bp->b_validend = bp->b_bufsize; + } #ifdef PC98 /* * 1024byte/sector support @@ -1562,7 +1566,7 @@ } if (bp->b_flags & B_VMIO) { int i, resid; - vm_ooffset_t foff; + vm_ooffset_t foff, bfoff; vm_page_t m; vm_object_t obj; int iosize; @@ -1572,6 +1576,7 @@ foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + bfoff = foff; obj = vp->v_object; if (!obj) { panic("biodone: no object"); @@ -1613,8 +1618,18 @@ * here in the read case. */ if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) { - vm_page_set_validclean(m, - (vm_offset_t) (foff & PAGE_MASK), resid); + vm_ooffset_t soff, eoff; + if (vp->v_tag == VT_NFS) { + soff = max((bfoff + bp->b_validoff + DEV_BSIZE - 1) & -DEV_BSIZE, + foff); + eoff = min((bfoff + bp->b_validend) & -DEV_BSIZE, + foff + resid); + vm_page_set_invalid(m, 0, PAGE_SIZE); + if (eoff > soff) + vm_page_set_validclean(m, soff, eoff - soff); + } else + vm_page_set_validclean(m, + (vm_offset_t) (foff & PAGE_MASK), resid); } /* @@ -1774,14 +1789,16 @@ int i; if (bp->b_flags & B_VMIO) { - vm_object_t obj = bp->b_vp->v_object; - vm_ooffset_t foff; + struct vnode *vp = bp->b_vp; + vm_object_t obj = vp->v_object; + vm_ooffset_t foff, bfoff; int iocount = bp->b_bufsize; if (bp->b_vp->v_type == VBLK) foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else - foff = (vm_ooffset_t) bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + bfoff = foff; vfs_setdirty(bp); for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; @@ -1795,8 +1812,18 @@ } vm_page_protect(m, VM_PROT_NONE); if (clear_modify) { - vm_page_set_validclean(m, - (vm_offset_t) (foff & PAGE_MASK), resid); + vm_ooffset_t soff, eoff; + if (vp->v_tag == VT_NFS) { + soff = max((bfoff + bp->b_validoff + DEV_BSIZE - 1) & -DEV_BSIZE, + foff); + eoff = min((bfoff + bp->b_validend) & -DEV_BSIZE, + foff + resid); + vm_page_set_invalid(m, 0, PAGE_SIZE); + if (eoff > soff) + vm_page_set_validclean(m, soff, eoff - soff); + } else + vm_page_set_validclean(m, + (vm_offset_t) (foff & PAGE_MASK), resid); } else if (bp->b_bcount >= PAGE_SIZE) { if (m->valid && (bp->b_flags & B_CACHE) == 0) { bp->b_pages[i] = bogus_page; @@ -1820,13 +1847,15 @@ int i; if (bp->b_flags & B_VMIO) { - vm_ooffset_t foff; + vm_ooffset_t foff, bfoff; + struct vnode *vp = bp->b_vp; int iocount = bp->b_bufsize; if (bp->b_vp->v_type == VBLK) foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else - foff = (vm_ooffset_t) bp->b_vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + bfoff = foff; for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; @@ -1835,8 +1864,18 @@ if (resid > iocount) resid = iocount; if (resid > 0) { - vm_page_set_validclean(m, - ((vm_offset_t) foff & PAGE_MASK), resid); + vm_ooffset_t soff, eoff; + if (vp->v_tag == VT_NFS) { + soff = max((bfoff + bp->b_validoff + DEV_BSIZE - 1) & -DEV_BSIZE, + foff); + eoff = min((bfoff + bp->b_validend) & -DEV_BSIZE, + foff + resid); + vm_page_set_invalid(m, 0, PAGE_SIZE); + if (eoff > soff) + vm_page_set_validclean(m, soff, eoff - soff); + } else + vm_page_set_validclean(m, + (vm_offset_t) (foff & PAGE_MASK), resid); } foff += resid; iocount -= resid; Index: vm/vm_fault.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vm_fault.c,v retrieving revision 1.68 diff -u -r1.68 vm_fault.c --- vm_fault.c 1997/04/06 16:16:11 1.68 +++ vm_fault.c 1997/05/17 15:12:15 @@ -326,7 +326,7 @@ m->flags |= PG_BUSY; - if (m->valid && + if (/*m->valid && */ ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) && m->object != kernel_object && m->object != kmem_object) { goto readrest; Index: vm/vnode_pager.c =================================================================== RCS file: /home/ncvs/src/sys/vm/vnode_pager.c,v retrieving revision 1.70 diff -u -r1.70 vnode_pager.c --- vnode_pager.c 1997/03/08 04:33:47 1.70 +++ vnode_pager.c 1997/05/17 15:28:25 @@ -692,7 +692,12 @@ /* * if ANY DEV_BSIZE blocks are valid on a large filesystem block * then, the entire page is valid -- + * XXX no it isn't */ + + if (m[reqpage]->valid != VM_PAGE_BITS_ALL) + m[reqpage]->valid = 0; + if (m[reqpage]->valid) { m[reqpage]->valid = VM_PAGE_BITS_ALL; for (i = 0; i < count; i++) { -- Doug Rabson Mail: dfr@nlsystems.com Nonlinear Systems Ltd. Phone: +44 181 951 1891
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Pine.BSF.3.95q.970517164819.5082B-100000>