Date: Thu, 7 Jun 2012 17:42:39 GMT From: John Baldwin <jhb@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 212439 for review Message-ID: <201206071742.q57Hgdb0033633@skunkworks.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@212439?ac=10 Change 212439 by jhb@jhb_jhbbsd on 2012/06/07 17:42:28 Adjust noreuse writes to work the same as noreuse reads. Affected files ... .. //depot/projects/fadvise/sys/kern/vfs_vnops.c#8 edit Differences ... ==== //depot/projects/fadvise/sys/kern/vfs_vnops.c#8 (text+ko) ==== @@ -593,7 +593,7 @@ * access, track the previous implicit DONTNEED * request and grow this request to include the * current read(2) in addition to the previous - * DONTNEED. Withpurely sequential access this will + * DONTNEED. With purely sequential access this will * cause the DONTNEED requests to continously grow to * cover all of the previously read regions of the * file. This allows filesystem blocks that are @@ -636,6 +636,7 @@ int error, ioflag, lock_flags; struct mtx *mtxp; int advice, vfslocked; + off_t offset, start, end; KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); @@ -670,6 +671,7 @@ if ((flags & FOF_OFFSET) == 0) uio->uio_offset = fp->f_offset; advice = POSIX_FADV_NORMAL; + mtxp = NULL; if (fp->f_advice != NULL) { mtxp = mtx_pool_find(mtxpool_sleep, fp); mtx_lock(mtxp); @@ -682,19 +684,14 @@ switch (advice) { case POSIX_FADV_NORMAL: case POSIX_FADV_SEQUENTIAL: + case POSIX_FADV_NOREUSE: ioflag |= sequential_heuristic(uio, fp); break; case POSIX_FADV_RANDOM: /* XXX: Is this correct? */ break; - case POSIX_FADV_NOREUSE: - /* - * Request the underlying FS to discard the buffers - * and pages after the I/O is complete. - */ - ioflag |= IO_DIRECT; - break; } + offset = uio->uio_offset; #ifdef MAC error = mac_vnode_check_write(active_cred, fp->f_cred, vp); @@ -707,6 +704,55 @@ VOP_UNLOCK(vp, 0); if (vp->v_type != VCHR) vn_finished_write(mp); + if (error == 0 && advice == POSIX_FADV_NOREUSE && + offset != uio->uio_offset) { + /* + * Use POSIX_FADV_DONTNEED to flush clean pages and + * buffers for the backing file after a + * POSIX_FADV_NOREUSE write(2). To optimize the + * common case of using POSIX_FADV_NOREUSE with + * sequential access, track the previous implicit + * DONTNEED request and grow this request to include + * the current write(2) in addition to the previous + * DONTNEED. With purely sequential access this will + * cause the DONTNEED requests to continously grow to + * cover all of the previously written regions of the + * file. + * + * Note that the blocks just written are almost + * certainly still dirty, so this only works when + * VOP_ADVISE() calls from subsequent writes push out + * the data written by this write(2) once the backing + * buffers are clean. However, as compared to forcing + * IO_DIRECT, this gives much saner behavior. Write + * clustering is still allowed, and clean pages are + * merely moved to the cache page queue rather than + * outright thrown away. This means a subsequent + * read(2) can still avoid hitting the disk if the + * pages have not been reclaimed. + * + * This does make POSIX_FADV_NOREUSE largely useless + * with non-sequential access. However, sequential + * access is the more common use case and the flag is + * merely advisory. + */ + start = offset; + end = uio->uio_offset - 1; + mtx_lock(mtxp); + if (fp->f_advice != NULL && + fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) { + if (start != 0 && fp->f_advice->fa_prevend + 1 == start) + start = fp->f_advice->fa_prevstart; + else if (fp->f_advice->fa_prevstart != 0 && + fp->f_advice->fa_prevstart == end + 1) + end = fp->f_advice->fa_prevend; + fp->f_advice->fa_prevstart = start; + fp->f_advice->fa_prevend = end; + } + mtx_unlock(mtxp); + error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED); + } + unlock: VFS_UNLOCK_GIANT(vfslocked); return (error);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201206071742.q57Hgdb0033633>