From owner-svn-src-projects@FreeBSD.ORG Fri Apr 23 09:09:39 2010 Return-Path: Delivered-To: svn-src-projects@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id B8C4A106564A; Fri, 23 Apr 2010 09:09:39 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id A674C8FC19; Fri, 23 Apr 2010 09:09:39 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o3N99dfY053313; Fri, 23 Apr 2010 09:09:39 GMT (envelope-from jeff@svn.freebsd.org) Received: (from jeff@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o3N99dVw053307; Fri, 23 Apr 2010 09:09:39 GMT (envelope-from jeff@svn.freebsd.org) Message-Id: <201004230909.o3N99dVw053307@svn.freebsd.org> From: Jeff Roberson Date: Fri, 23 Apr 2010 09:09:39 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org X-SVN-Group: projects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r207106 - in projects/suj/head/sys: sys ufs/ffs X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 23 Apr 2010 09:09:39 -0000 Author: jeff Date: Fri Apr 23 09:09:39 2010 New Revision: 207106 URL: http://svn.freebsd.org/changeset/base/207106 Log: Fix snapshots on SUJ protected filesystems. - Expunge the journal file from the snapshot filesystem. This leaves it present with a zero byte file. - Unfortunately expunging is insufficient to prevent a deadlock between the snaplk and jwait. Implement a B_NOCOPY flag that causes a buf write to skip ffs_copyonwrite all together. - When removing a snapshot sync the re-written indirect blocks before calling truncate. This is a long-standing snapshot bug that applies to any softdep protected ffs filesystem. The dirtied indirect blocks have no dependencies so softdep_setup_freeblocks will discard their contents and use the physical disk block addresses in indir_trunc() when removing the file. This can lead to indir_trunc discovering block pointers with BLK_NOCOPY and BLK_SNAP values. Modified: projects/suj/head/sys/sys/buf.h projects/suj/head/sys/ufs/ffs/ffs_extern.h projects/suj/head/sys/ufs/ffs/ffs_snapshot.c projects/suj/head/sys/ufs/ffs/ffs_softdep.c projects/suj/head/sys/ufs/ffs/ffs_vfsops.c Modified: projects/suj/head/sys/sys/buf.h ============================================================================== --- projects/suj/head/sys/sys/buf.h Fri Apr 23 08:49:38 2010 (r207105) +++ projects/suj/head/sys/sys/buf.h Fri Apr 23 09:09:39 2010 (r207106) @@ -215,7 +215,7 @@ struct buf { #define B_DIRTY 0x00200000 /* Needs writing later (in EXT2FS). */ #define B_RELBUF 0x00400000 /* Release VMIO buffer. */ #define B_00800000 0x00800000 /* Available flag. */ -#define B_01000000 0x01000000 /* Available flag. */ +#define B_NOCOPY 0x01000000 /* Don't copy-on-write this buf. */ #define B_NEEDSGIANT 0x02000000 /* Buffer's vnode needs giant. */ #define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */ #define B_MANAGED 0x08000000 /* Managed by FS. */ Modified: projects/suj/head/sys/ufs/ffs/ffs_extern.h ============================================================================== --- projects/suj/head/sys/ufs/ffs/ffs_extern.h Fri Apr 23 08:49:38 2010 (r207105) +++ projects/suj/head/sys/ufs/ffs/ffs_extern.h Fri Apr 23 09:09:39 2010 (r207106) @@ -145,6 +145,8 @@ int softdep_process_worklist(struct int softdep_fsync(struct vnode *); int softdep_waitidle(struct mount *); int softdep_prealloc(struct vnode *, int); +int softdep_journal_lookup(struct mount *, struct vnode **); + int ffs_rdonly(struct inode *); Modified: projects/suj/head/sys/ufs/ffs/ffs_snapshot.c ============================================================================== --- projects/suj/head/sys/ufs/ffs/ffs_snapshot.c Fri Apr 23 08:49:38 2010 (r207105) +++ projects/suj/head/sys/ufs/ffs/ffs_snapshot.c Fri Apr 23 09:09:39 2010 (r207106) @@ -142,7 +142,7 @@ MTX_SYSINIT(ffs_snapfree, &snapfree_lock static int cgaccount(int, struct vnode *, struct buf *, int); static int expunge_ufs1(struct vnode *, struct inode *, struct fs *, int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, - ufs_lbn_t, int), int); + ufs_lbn_t, int), int, int); static int indiracct_ufs1(struct vnode *, struct vnode *, int, ufs1_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, int (*)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, @@ -155,7 +155,7 @@ static int mapacct_ufs1(struct vnode *, struct fs *, ufs_lbn_t, int); static int expunge_ufs2(struct vnode *, struct inode *, struct fs *, int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, - ufs_lbn_t, int), int); + ufs_lbn_t, int), int, int); static int indiracct_ufs2(struct vnode *, struct vnode *, int, ufs2_daddr_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, ufs_lbn_t, struct fs *, int (*)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, @@ -591,10 +591,10 @@ loop: snaplistsize += 1; if (xp->i_ump->um_fstype == UFS1) error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, - BLK_NOCOPY); + BLK_NOCOPY, 1); else error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, - BLK_NOCOPY); + BLK_NOCOPY, 1); if (blkno) DIP_SET(xp, i_db[loc], blkno); if (!error) @@ -613,6 +613,26 @@ loop: } MNT_IUNLOCK(mp); /* + * Erase the journal file from the snapshot. + */ + if (fs->fs_flags & FS_SUJ) { + error = softdep_journal_lookup(mp, &xvp); + if (error) { + free(copy_fs->fs_csp, M_UFSMNT); + bawrite(sbp); + sbp = NULL; + goto out1; + } + xp = VTOI(xvp); + if (xp->i_ump->um_fstype == UFS1) + error = expunge_ufs1(vp, xp, copy_fs, fullacct_ufs1, + BLK_NOCOPY, 0); + else + error = expunge_ufs2(vp, xp, copy_fs, fullacct_ufs2, + BLK_NOCOPY, 0); + vput(xvp); + } + /* * Acquire a lock on the snapdata structure, creating it if necessary. */ sn = ffs_snapdata_acquire(devvp); @@ -692,10 +712,10 @@ out1: break; if (xp->i_ump->um_fstype == UFS1) error = expunge_ufs1(vp, xp, fs, snapacct_ufs1, - BLK_SNAP); + BLK_SNAP, 0); else error = expunge_ufs2(vp, xp, fs, snapacct_ufs2, - BLK_SNAP); + BLK_SNAP, 0); if (error == 0 && xp->i_effnlink == 0) { error = ffs_freefile(ump, copy_fs, @@ -720,9 +740,11 @@ out1: * the list of allocated blocks in i_snapblklist. */ if (ip->i_ump->um_fstype == UFS1) - error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, BLK_SNAP); + error = expunge_ufs1(vp, ip, copy_fs, mapacct_ufs1, + BLK_SNAP, 0); else - error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, BLK_SNAP); + error = expunge_ufs2(vp, ip, copy_fs, mapacct_ufs2, + BLK_SNAP, 0); if (error) { fs->fs_snapinum[snaploc] = 0; free(snapblklist, M_UFSMNT); @@ -955,13 +977,14 @@ cgaccount(cg, vp, nbp, passno) * is reproduced once each for UFS1 and UFS2. */ static int -expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype) +expunge_ufs1(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) struct vnode *snapvp; struct inode *cancelip; struct fs *fs; int (*acctfunc)(struct vnode *, ufs1_daddr_t *, ufs1_daddr_t *, struct fs *, ufs_lbn_t, int); int expungetype; + int clearmode; { int i, error, indiroff; ufs_lbn_t lbn, rlbn; @@ -1006,7 +1029,7 @@ expunge_ufs1(snapvp, cancelip, fs, acctf */ dip = (struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, cancelip->i_number); - if (expungetype == BLK_NOCOPY || cancelip->i_effnlink == 0) + if (clearmode || cancelip->i_effnlink == 0) dip->di_mode = 0; dip->di_size = 0; dip->di_blocks = 0; @@ -1235,13 +1258,14 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, * is reproduced once each for UFS1 and UFS2. */ static int -expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype) +expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode) struct vnode *snapvp; struct inode *cancelip; struct fs *fs; int (*acctfunc)(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *, struct fs *, ufs_lbn_t, int); int expungetype; + int clearmode; { int i, error, indiroff; ufs_lbn_t lbn, rlbn; @@ -1286,7 +1310,7 @@ expunge_ufs2(snapvp, cancelip, fs, acctf */ dip = (struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, cancelip->i_number); - if (expungetype == BLK_NOCOPY) + if (clearmode || cancelip->i_effnlink == 0) dip->di_mode = 0; dip->di_size = 0; dip->di_blocks = 0; @@ -1658,6 +1682,13 @@ ffs_snapremove(vp) ip->i_flags &= ~SF_SNAPSHOT; DIP_SET(ip, i_flags, ip->i_flags); ip->i_flag |= IN_CHANGE | IN_UPDATE; + /* + * The dirtied indirects must be written out before + * softdep_setup_freeblocks() is called. Otherwise indir_trunc() + * may find indirect pointers using the magic BLK_* values. + */ + if (DOINGSOFTDEP(vp)) + ffs_syncvnode(vp, MNT_WAIT); #ifdef QUOTA /* * Reenable disk quotas for ex-snapshot file. Modified: projects/suj/head/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- projects/suj/head/sys/ufs/ffs/ffs_softdep.c Fri Apr 23 08:49:38 2010 (r207105) +++ projects/suj/head/sys/ufs/ffs/ffs_softdep.c Fri Apr 23 09:09:39 2010 (r207106) @@ -2052,27 +2052,16 @@ jblocks_add(jblocks, daddr, blocks) return; } -/* - * Open and verify the journal file. - */ -static int -journal_mount(mp, fs, cred) +int +softdep_journal_lookup(mp, vpp) struct mount *mp; - struct fs *fs; - struct ucred *cred; + struct vnode **vpp; { struct componentname cnp; - struct jblocks *jblocks; struct vnode *dvp; - struct vnode *vp; - struct inode *ip; - ufs2_daddr_t blkno; ino_t sujournal; - int bcount; int error; - int i; - mp->mnt_kern_flag |= MNTK_SUJ; error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp); if (error) return (error); @@ -2086,13 +2075,35 @@ journal_mount(mp, fs, cred) cnp.cn_namelen = strlen(SUJ_FILE); error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal); vput(dvp); + if (error != 0) + return (error); + error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, vpp); + return (error); +} + +/* + * Open and verify the journal file. + */ +static int +journal_mount(mp, fs, cred) + struct mount *mp; + struct fs *fs; + struct ucred *cred; +{ + struct jblocks *jblocks; + struct vnode *vp; + struct inode *ip; + ufs2_daddr_t blkno; + int bcount; + int error; + int i; + + mp->mnt_kern_flag |= MNTK_SUJ; + error = softdep_journal_lookup(mp, &vp); if (error != 0) { printf("Failed to find journal. Use tunefs to create one\n"); return (error); } - error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, &vp); - if (error) - return (error); ip = VTOI(vp); if (ip->i_size < SUJ_MIN) { error = ENOSPC; @@ -2588,6 +2599,7 @@ softdep_process_journal(mp, flags) bp->b_bcount = size; bp->b_bufobj = &ump->um_devvp->v_bufobj; bp->b_flags &= ~B_INVAL; + bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY; /* * Initialize our jseg with cnt records. Assign the next * sequence number to it and link it in-order. Modified: projects/suj/head/sys/ufs/ffs/ffs_vfsops.c ============================================================================== --- projects/suj/head/sys/ufs/ffs/ffs_vfsops.c Fri Apr 23 08:49:38 2010 (r207105) +++ projects/suj/head/sys/ufs/ffs/ffs_vfsops.c Fri Apr 23 09:09:39 2010 (r207106) @@ -1948,6 +1948,7 @@ ffs_geom_strategy(struct bufobj *bo, str struct vnode *vp; int error; struct buf *tbp; + int nocopy; vp = bo->__bo_vnode; if (bp->b_iocmd == BIO_WRITE) { @@ -1955,8 +1956,9 @@ ffs_geom_strategy(struct bufobj *bo, str bp->b_vp != NULL && bp->b_vp->v_mount != NULL && (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) panic("ffs_geom_strategy: bad I/O"); - bp->b_flags &= ~B_VALIDSUSPWRT; - if ((vp->v_vflag & VV_COPYONWRITE) && + nocopy = bp->b_flags & B_NOCOPY; + bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY); + if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 && vp->v_rdev->si_snapdata != NULL) { if ((bp->b_flags & B_CLUSTER) != 0) { runningbufwakeup(bp);