From owner-svn-src-all@FreeBSD.ORG Wed Dec 14 22:04:15 2011 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 58361106564A; Wed, 14 Dec 2011 22:04:15 +0000 (UTC) (envelope-from pfg@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 3BCB88FC12; Wed, 14 Dec 2011 22:04:15 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id pBEM4FRt016571; Wed, 14 Dec 2011 22:04:15 GMT (envelope-from pfg@svn.freebsd.org) Received: (from pfg@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id pBEM4FfZ016569; Wed, 14 Dec 2011 22:04:15 GMT (envelope-from pfg@svn.freebsd.org) Message-Id: <201112142204.pBEM4FfZ016569@svn.freebsd.org> From: "Pedro F. Giffuni" Date: Wed, 14 Dec 2011 22:04:15 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r228507 - head/sys/fs/ext2fs X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 14 Dec 2011 22:04:15 -0000 Author: pfg Date: Wed Dec 14 22:04:14 2011 New Revision: 228507 URL: http://svn.freebsd.org/changeset/base/228507 Log: Merge ext2_readwrite.c into ext2_vnops.c as done in UFS in r101729. This removes the obfuscations mentioned in ext2_readwrite and places the clustering funtion in a location similar to other UFS-based implementations. No performance or functional changeses are expected from this move. PR: kern/159232 Suggested by: bde Approved by: jhb (mentor) MFC after: 2 weeks Deleted: head/sys/fs/ext2fs/ext2_readwrite.c Modified: head/sys/fs/ext2fs/ext2_vnops.c Modified: head/sys/fs/ext2fs/ext2_vnops.c ============================================================================== --- head/sys/fs/ext2fs/ext2_vnops.c Wed Dec 14 20:57:41 2011 (r228506) +++ head/sys/fs/ext2fs/ext2_vnops.c Wed Dec 14 22:04:14 2011 (r228507) @@ -64,9 +64,13 @@ #include #include +#include +#include #include #include +#include "opt_directio.h" + #include #include @@ -159,8 +163,6 @@ struct vop_vector ext2_fifoops = { .vop_vptofh = ext2_vptofh, }; -#include - /* * A virgin directory (no blushing please). * Note that the type and namlen fields are reversed relative to ext2. @@ -1675,3 +1677,328 @@ bad: vput(tvp); return (error); } + +/* + * Vnode op for reading. + */ +static int +ext2_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct vnode *vp; + struct inode *ip; + struct uio *uio; + struct m_ext2fs *fs; + struct buf *bp; + daddr_t lbn, nextlbn; + off_t bytesinfile; + long size, xfersize, blkoffset; + int error, orig_resid, seqcount; + int ioflag; + + vp = ap->a_vp; + uio = ap->a_uio; + ioflag = ap->a_ioflag; + + seqcount = ap->a_ioflag >> IO_SEQSHIFT; + ip = VTOI(vp); + +#ifdef INVARIANTS + if (uio->uio_rw != UIO_READ) + panic("%s: mode", "ext2_read"); + + if (vp->v_type == VLNK) { + if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) + panic("%s: short symlink", "ext2_read"); + } else if (vp->v_type != VREG && vp->v_type != VDIR) + panic("%s: type %d", "ext2_read", vp->v_type); +#endif + orig_resid = uio->uio_resid; + KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); + if (orig_resid == 0) + return (0); + KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); + fs = ip->i_e2fs; + if (uio->uio_offset < ip->i_size && + uio->uio_offset >= fs->e2fs_maxfilesize) + return (EOVERFLOW); + + for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { + if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) + break; + lbn = lblkno(fs, uio->uio_offset); + nextlbn = lbn + 1; + size = blksize(fs, ip, lbn); + blkoffset = blkoff(fs, uio->uio_offset); + + xfersize = fs->e2fs_fsize - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + if (bytesinfile < xfersize) + xfersize = bytesinfile; + + if (lblktosize(fs, nextlbn) >= ip->i_size) + error = bread(vp, lbn, size, NOCRED, &bp); + else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) + error = cluster_read(vp, ip->i_size, lbn, size, + NOCRED, blkoffset + uio->uio_resid, seqcount, &bp); + else if (seqcount > 1) { + int nextsize = blksize(fs, ip, nextlbn); + error = breadn(vp, lbn, + size, &nextlbn, &nextsize, 1, NOCRED, &bp); + } else + error = bread(vp, lbn, size, NOCRED, &bp); + if (error) { + brelse(bp); + bp = NULL; + break; + } + + /* + * If IO_DIRECT then set B_DIRECT for the buffer. This + * will cause us to attempt to release the buffer later on + * and will cause the buffer cache to attempt to free the + * underlying pages. + */ + if (ioflag & IO_DIRECT) + bp->b_flags |= B_DIRECT; + + /* + * We should only get non-zero b_resid when an I/O error + * has occurred, which should cause us to break above. + * However, if the short read did not cause an error, + * then we want to ensure that we do not uiomove bad + * or uninitialized data. + */ + size -= bp->b_resid; + if (size < xfersize) { + if (size == 0) + break; + xfersize = size; + } + error = uiomove((char *)bp->b_data + blkoffset, + (int)xfersize, uio); + if (error) + break; + + if (ioflag & (IO_VMIO|IO_DIRECT)) { + /* + * If it's VMIO or direct I/O, then we don't + * need the buf, mark it available for + * freeing. If it's non-direct VMIO, the VM has + * the data. + */ + bp->b_flags |= B_RELBUF; + brelse(bp); + } else { + /* + * Otherwise let whoever + * made the request take care of + * freeing it. We just queue + * it onto another list. + */ + bqrelse(bp); + } + } + + /* + * This can only happen in the case of an error + * because the loop above resets bp to NULL on each iteration + * and on normal completion has not set a new value into it. + * so it must have come from a 'break' statement + */ + if (bp != NULL) { + if (ioflag & (IO_VMIO|IO_DIRECT)) { + bp->b_flags |= B_RELBUF; + brelse(bp); + } else { + bqrelse(bp); + } + } + + if ((error == 0 || uio->uio_resid != orig_resid) && + (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) + ip->i_flag |= IN_ACCESS; + return (error); +} + +/* + * Vnode op for writing. + */ +static int +ext2_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct vnode *vp; + struct uio *uio; + struct inode *ip; + struct m_ext2fs *fs; + struct buf *bp; + daddr_t lbn; + off_t osize; + int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; + + ioflag = ap->a_ioflag; + uio = ap->a_uio; + vp = ap->a_vp; + + seqcount = ioflag >> IO_SEQSHIFT; + ip = VTOI(vp); + +#ifdef INVARIANTS + if (uio->uio_rw != UIO_WRITE) + panic("%s: mode", "ext2_write"); +#endif + + switch (vp->v_type) { + case VREG: + if (ioflag & IO_APPEND) + uio->uio_offset = ip->i_size; + if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) + return (EPERM); + /* FALLTHROUGH */ + case VLNK: + break; + case VDIR: + /* XXX differs from ffs -- this is called from ext2_mkdir(). */ + if ((ioflag & IO_SYNC) == 0) + panic("ext2_write: nonsync dir write"); + break; + default: + panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, + vp->v_type, (intmax_t)uio->uio_offset, + (intmax_t)uio->uio_resid); + } + + KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); + KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); + fs = ip->i_e2fs; + if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) + return (EFBIG); + /* + * Maybe this should be above the vnode op call, but so long as + * file servers have no limits, I don't think it matters. + */ + if (vn_rlimit_fsize(vp, uio, uio->uio_td)) + return (EFBIG); + + resid = uio->uio_resid; + osize = ip->i_size; + if (seqcount > BA_SEQMAX) + flags = BA_SEQMAX << BA_SEQSHIFT; + else + flags = seqcount << BA_SEQSHIFT; + if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) + flags |= IO_SYNC; + + for (error = 0; uio->uio_resid > 0;) { + lbn = lblkno(fs, uio->uio_offset); + blkoffset = blkoff(fs, uio->uio_offset); + xfersize = fs->e2fs_fsize - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + if (uio->uio_offset + xfersize > ip->i_size) + vnode_pager_setsize(vp, uio->uio_offset + xfersize); + + /* + * We must perform a read-before-write if the transfer size + * does not cover the entire buffer. + */ + if (fs->e2fs_bsize > xfersize) + flags |= BA_CLRBUF; + else + flags &= ~BA_CLRBUF; + error = ext2_balloc(ip, lbn, blkoffset + xfersize, + ap->a_cred, &bp, flags); + if (error != 0) + break; + + /* + * If the buffer is not valid and we did not clear garbage + * out above, we have to do so here even though the write + * covers the entire buffer in order to avoid a mmap()/write + * race where another process may see the garbage prior to + * the uiomove() for a write replacing it. + */ + if ((bp->b_flags & B_CACHE) == 0 && fs->e2fs_bsize <= xfersize) + vfs_bio_clrbuf(bp); + if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) + bp->b_flags |= B_NOCACHE; + if (uio->uio_offset + xfersize > ip->i_size) + ip->i_size = uio->uio_offset + xfersize; + size = blksize(fs, ip, lbn) - bp->b_resid; + if (size < xfersize) + xfersize = size; + + error = + uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); + if (ioflag & (IO_VMIO|IO_DIRECT)) { + bp->b_flags |= B_RELBUF; + } + + /* + * If IO_SYNC each buffer is written synchronously. Otherwise + * if we have a severe page deficiency write the buffer + * asynchronously. Otherwise try to cluster, and if that + * doesn't do it then either do an async write (if O_DIRECT), + * or a delayed write (if not). + */ + if (ioflag & IO_SYNC) { + (void)bwrite(bp); + } else if (vm_page_count_severe() || + buf_dirty_count_severe() || + (ioflag & IO_ASYNC)) { + bp->b_flags |= B_CLUSTEROK; + bawrite(bp); + } else if (xfersize + blkoffset == fs->e2fs_fsize) { + if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { + bp->b_flags |= B_CLUSTEROK; + cluster_write(vp, bp, ip->i_size, seqcount); + } else { + bawrite(bp); + } + } else if (ioflag & IO_DIRECT) { + bp->b_flags |= B_CLUSTEROK; + bawrite(bp); + } else { + bp->b_flags |= B_CLUSTEROK; + bdwrite(bp); + } + if (error || xfersize == 0) + break; + } + /* + * If we successfully wrote any data, and we are not the superuser + * we clear the setuid and setgid bits as a precaution against + * tampering. + */ + if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && + ap->a_cred) { + if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) + ip->i_mode &= ~(ISUID | ISGID); + } + if (error) { + if (ioflag & IO_UNIT) { + (void)ext2_truncate(vp, osize, + ioflag & IO_SYNC, ap->a_cred, uio->uio_td); + uio->uio_offset -= resid - uio->uio_resid; + uio->uio_resid = resid; + } + } + if (uio->uio_resid != resid) { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + if (ioflag & IO_SYNC) + error = ext2_update(vp, 1); + } + return (error); +}