Date: Fri, 8 Jun 2012 17:51:48 GMT From: John Baldwin <jhb@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 212482 for review Message-ID: <201206081751.q58HpmJA025814@skunkworks.freebsd.org>
index | next in thread | raw e-mail
http://p4web.freebsd.org/@@212482?ac=10 Change 212482 by jhb@jhb_jhbbsd on 2012/06/08 17:51:43 - Revert the V_CLEANONLY change to vinvalbuf(), it was a bit too heavy handed as it forcibly free'd all pages for any busy buffers whereas we just want to detach the buffer from the pages and then move the pages to cache. - Add a new vdetachbuf() that does just that (I think). Use this instead of vinvalbuf() in vop_stdadvise(). - Inline the needed parts including a more accurate vdetachbuf() call in ffs's version of DONTNEED (though I'm less convinced this is needed). Affected files ... .. //depot/projects/fadvise/sys/kern/vfs_default.c#8 edit .. //depot/projects/fadvise/sys/kern/vfs_subr.c#9 edit .. //depot/projects/fadvise/sys/sys/vnode.h#7 edit .. //depot/projects/fadvise/sys/ufs/ffs/ffs_vnops.c#6 edit Differences ... ==== //depot/projects/fadvise/sys/kern/vfs_default.c#8 (text+ko) ==== @@ -1005,8 +1005,9 @@ case POSIX_FADV_DONTNEED: /* * Flush any open FS buffers and then remove pages - * from the backing VM object. Using vinvalbuf() here - * is a bit heavy-handed as it flushes all buffers for + * from the backing VM object. Since the block size + * is FS-specific knowledge, the vdetachbuf() here is + * a bit heavy-handed as it flushes all buffers for * the given vnode, not just the buffers covering the * requested range. */ @@ -1018,7 +1019,7 @@ VFS_UNLOCK_GIANT(vfslocked); break; } - vinvalbuf(vp, V_CLEANONLY, 0, 0); + vdetachbuf(vp, 0, 0, 0, 0); if (vp->v_object != NULL) { start = trunc_page(ap->a_start); end = round_page(ap->a_end); ==== //depot/projects/fadvise/sys/kern/vfs_subr.c#9 (text+ko) ==== @@ -1191,7 +1191,7 @@ do { error = flushbuflist(&bo->bo_clean, flags, bo, slpflag, slptimeo); - if (error == 0 && !(flags & V_CLEANONLY)) + if (error == 0) error = flushbuflist(&bo->bo_dirty, flags, bo, slpflag, slptimeo); if (error != 0 && error != EAGAIN) { @@ -1221,7 +1221,7 @@ * Destroy the copy in the VM cache, too. */ if (bo->bo_object != NULL && - (flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0) { + (flags & (V_ALT | V_NORMAL)) == 0) { VM_OBJECT_LOCK(bo->bo_object); vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? OBJPR_CLEANONLY : 0); @@ -1230,7 +1230,7 @@ #ifdef INVARIANTS BO_LOCK(bo); - if ((flags & (V_ALT | V_NORMAL | V_CLEANONLY)) == 0 && + if ((flags & (V_ALT | V_NORMAL)) == 0 && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("vinvalbuf: flush failed"); BO_UNLOCK(bo); @@ -1253,7 +1253,6 @@ /* * Flush out buffers on the specified list. - * */ static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, @@ -1329,6 +1328,67 @@ } /* + * Detach clean buffers for a range of blocks in a file from it's + * backing pages. This does not invalidate the buffers like + * flushbuflist(). If the range is specified as (0, 0), then all + * clean buffers are detached. + */ +int +vdetachbuf(struct vnode *vp, daddr_t start, daddr_t end, int slpflag, + int slptimeo) +{ + struct bufobj *bo; + struct buf *bp, *nbp; + int error; + daddr_t lblkno; + b_xflags_t xflags; + + bo = &vp->v_bufobj; + BO_LOCK(bo); +restart: + TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { + if ((start != 0 || end != 0) && (bp->b_lblkno < start || + bp->b_lblkno > end)) + continue; + lblkno = 0; + xflags = 0; + if (nbp != NULL) { + lblkno = nbp->b_lblkno; + xflags = nbp->b_xflags & + (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN); + } + error = BUF_TIMELOCK(bp, + LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo), + "detachbuf", slpflag, slptimeo); + if (error != ENOLCK) + return (error); + if (error == EAGAIN) { + BO_LOCK(bo); + goto restart; + } + KASSERT(bp->b_bufobj == bo, + ("bp %p wrong b_bufobj %p should be %p", + bp, bp->b_bufobj, bo)); + KASSERT(!(bp->b_flags & B_DELWRI), + ("delayed write on bo_clean")); + BO_LOCK(bo); + bremfree(bp); + BO_UNLOCK(bo); + bp->b_flags &= ~B_ASYNC; + brelse(bp); + BO_LOCK(bo); + if (nbp != NULL && + (nbp->b_bufobj != bo || + nbp->b_lblkno != lblkno || + (nbp->b_xflags & + (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN)) != xflags)) + goto restart; /* nbp invalid */ + } + BO_UNLOCK(bo); + return (0); +} + +/* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. ==== //depot/projects/fadvise/sys/sys/vnode.h#7 (text+ko) ==== @@ -384,7 +384,6 @@ #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */ #define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */ -#define V_CLEANONLY 0x0008 /* vinvalbuf: invalidate only clean bufs */ #define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */ #define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */ #define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */ @@ -624,6 +623,8 @@ struct ucred *cred, int *privused); void vattr_null(struct vattr *vap); int vcount(struct vnode *vp); +int vdetachbuf(struct vnode *vp, daddr_t start, daddr_t end, int slpflag, + int slptimeo); void vdrop(struct vnode *); void vdropl(struct vnode *); void vdestroy(struct vnode *); ==== //depot/projects/fadvise/sys/ufs/ffs/ffs_vnops.c#6 (text+ko) ==== @@ -430,7 +430,7 @@ vp = ap->a_vp; start = ap->a_start; end = ap->a_end; - vn_lock(vp, LK_SHARED | LK_RETRY); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); return (EBADF); @@ -457,12 +457,21 @@ if (ap->a_start != start || ap->a_end != end) printf("ffs_advise(DONTNEED): (%#lx-%#lx) -> (%#lx-%#lx)", ap->a_start, ap->a_end, start, end); + if (start > end) { + VOP_UNLOCK(vp, 0); + return (0); + } + vdetachbuf(vp, lblkno(fs, start), lblkno(fs, end), 0, 0); + if (vp->v_object != NULL) { + start = trunc_page(start); + end = round_page(end); + VM_OBJECT_LOCK(vp->v_object); + vm_object_page_cache(vp->v_object, OFF_TO_IDX(start), + OFF_TO_IDX(end)); + VM_OBJECT_UNLOCK(vp->v_object); + } VOP_UNLOCK(vp, 0); - if (start > end) - return (0); - ap->a_start = start; - ap->a_end = end; - return (vop_stdadvise(ap)); + return (0); case POSIX_FADV_WILLNEED: vp = ap->a_vp; start = ap->a_start;help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201206081751.q58HpmJA025814>
