From owner-freebsd-current Thu May 29 08:25:26 1997 Return-Path: Received: (from root@localhost) by hub.freebsd.org (8.8.5/8.8.5) id IAA19665 for current-outgoing; Thu, 29 May 1997 08:25:26 -0700 (PDT) Received: from nlsystems.com (nlsys.demon.co.uk [158.152.125.33]) by hub.freebsd.org (8.8.5/8.8.5) with ESMTP id IAA19660 for ; Thu, 29 May 1997 08:25:21 -0700 (PDT) Received: from herring.nlsystems.com (herring.nlsystems.com [10.0.0.2]) by nlsystems.com (8.8.5/8.8.5) with SMTP id QAA00543; Thu, 29 May 1997 16:25:07 +0100 (BST) Date: Thu, 29 May 1997 16:25:07 +0100 (BST) From: Doug Rabson To: Bruce Evans cc: current@freebsd.org Subject: Re: disk cache challenged by small block sizes In-Reply-To: <199705291228.WAA11504@godzilla.zeta.org.au> Message-ID: MIME-Version: 1.0 Content-Type: TEXT/PLAIN; charset=US-ASCII Sender: owner-current@freebsd.org X-Loop: FreeBSD.org Precedence: bulk On Thu, 29 May 1997, Bruce Evans wrote: > Iozone on an ext2fs file system with the default block size of 1K on an > (ncr) SCSI Zip disk gave the following poor results: > > [snip] > > This is probably related to slow reads from cd9660. (For cd9660, reads > apparently aren't cached, no matter what the block size is, and reads > with a block size of 512 are apparently repeated 4 times for each 2K > fs block. For ext2fs, the above shows that writes are sometimes cached > but another test shows that rereading flushes the cache.) This is caused by my recent changes to vfs_bio to make NFS mmap work properly. I didn't test on a filesystem with small block sizes. This patch should fix it: Index: vfs_bio.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_bio.c,v retrieving revision 1.116 diff -u -r1.116 vfs_bio.c --- vfs_bio.c 1997/05/19 14:36:36 1.116 +++ vfs_bio.c 1997/05/29 15:22:39 @@ -80,7 +80,8 @@ static void vfs_buf_set_valid(struct buf *bp, vm_ooffset_t foff, vm_offset_t off, vm_offset_t size, vm_page_t m); -static void vfs_page_set_valid(struct buf *bp, vm_offset_t off, vm_page_t m); +static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, + int pageno, vm_page_t m); static void vfs_clean_pages(struct buf * bp); static void vfs_setdirty(struct buf *bp); static void vfs_vmio_release(struct buf *bp); @@ -1572,7 +1573,7 @@ } if (bp->b_flags & B_VMIO) { int i, resid; - vm_ooffset_t foff, bfoff; + vm_ooffset_t foff; vm_page_t m; vm_object_t obj; int iosize; @@ -1582,7 +1583,6 @@ foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; else foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; - bfoff = foff; obj = vp->v_object; if (!obj) { panic("biodone: no object"); @@ -1624,7 +1624,7 @@ * here in the read case. */ if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) { - vfs_page_set_valid(bp, foff - bfoff, m); + vfs_page_set_valid(bp, foff, i, m); } /* @@ -1818,21 +1818,25 @@ /* * Set the valid bits in a page, taking care of the b_validoff, * b_validend fields which NFS uses to optimise small reads. Off is - * the offset of the page within the buf. + * the offset within the file and pageno is the page index within the buf. */ static void -vfs_page_set_valid(struct buf *bp, vm_offset_t off, vm_page_t m) +vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m) { struct vnode *vp = bp->b_vp; vm_offset_t soff, eoff; soff = off; - eoff = min(off + PAGE_SIZE, bp->b_bufsize); + eoff = off + min(PAGE_SIZE, bp->b_bufsize); + vm_page_set_invalid(m, soff, eoff); if (vp->v_tag == VT_NFS) { - soff = max((bp->b_validoff + DEV_BSIZE - 1) & -DEV_BSIZE, soff); - eoff = min(bp->b_validend & -DEV_BSIZE, eoff); + vm_offset_t sv, ev; + sv = off + (bp->b_validoff + DEV_BSIZE - 1) & -DEV_BSIZE + - pageno * PAGE_SIZE; + ev = off + bp->b_validend & -DEV_BSIZE - pageno * PAGE_SIZE; + soff = max(sv, soff); + eoff = min(ev, eoff); } - vm_page_set_invalid(m, 0, PAGE_SIZE); if (eoff > soff) vm_page_set_validclean(m, soff, eoff - soff); } @@ -1851,11 +1855,16 @@ int i; if (bp->b_flags & B_VMIO) { - vm_object_t obj = bp->b_vp->v_object; - vm_offset_t off; + struct vnode *vp = bp->b_vp; + vm_object_t obj = vp->v_object; + vm_ooffset_t foff; + if (vp->v_type == VBLK) + foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; + else + foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; vfs_setdirty(bp); - for (i = 0, off = 0; i < bp->b_npages; i++, off += PAGE_SIZE) { + for (i = 0; i < bp->b_npages; i++, foff += PAGE_SIZE) { vm_page_t m = bp->b_pages[i]; if ((bp->b_flags & B_CLUSTER) == 0) { @@ -1864,7 +1873,7 @@ } vm_page_protect(m, VM_PROT_NONE); if (clear_modify) - vfs_page_set_valid(bp, off, m); + vfs_page_set_valid(bp, foff, i, m); else if (bp->b_bcount >= PAGE_SIZE) { if (m->valid && (bp->b_flags & B_CACHE) == 0) { bp->b_pages[i] = bogus_page; @@ -1886,12 +1895,18 @@ int i; if (bp->b_flags & B_VMIO) { - vm_offset_t off; + struct vnode *vp = bp->b_vp; + vm_object_t obj = vp->v_object; + vm_ooffset_t foff; - for (i = 0, off = 0; i < bp->b_npages; i++, off += PAGE_SIZE) { + if (vp->v_type == VBLK) + foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno; + else + foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno; + for (i = 0; i < bp->b_npages; i++, foff += PAGE_SIZE) { vm_page_t m = bp->b_pages[i]; - vfs_page_set_valid(bp, off, m); + vfs_page_set_valid(bp, foff, i, m); } } } -- Doug Rabson Mail: dfr@nlsystems.com Nonlinear Systems Ltd. Phone: +44 181 951 1891 Fax: +44 181 381 1039