From owner-svn-src-all@FreeBSD.ORG Wed Jun 15 23:19:10 2011 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 0ACA31065670; Wed, 15 Jun 2011 23:19:10 +0000 (UTC) (envelope-from mckusick@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id ED4878FC16; Wed, 15 Jun 2011 23:19:09 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id p5FNJ9oi002533; Wed, 15 Jun 2011 23:19:09 GMT (envelope-from mckusick@svn.freebsd.org) Received: (from mckusick@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id p5FNJ9Yf002525; Wed, 15 Jun 2011 23:19:09 GMT (envelope-from mckusick@svn.freebsd.org) Message-Id: <201106152319.p5FNJ9Yf002525@svn.freebsd.org> From: Kirk McKusick Date: Wed, 15 Jun 2011 23:19:09 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r223127 - head/sys/ufs/ffs X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 15 Jun 2011 23:19:10 -0000 Author: mckusick Date: Wed Jun 15 23:19:09 2011 New Revision: 223127 URL: http://svn.freebsd.org/changeset/base/223127 Log: Ensure that filesystem metadata contained within persistent snapshots is always kept consistent. Suggested by: Jeff Roberson Modified: head/sys/ufs/ffs/ffs_alloc.c head/sys/ufs/ffs/ffs_balloc.c head/sys/ufs/ffs/ffs_extern.h head/sys/ufs/ffs/ffs_inode.c head/sys/ufs/ffs/ffs_snapshot.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/softdep.h Modified: head/sys/ufs/ffs/ffs_alloc.c ============================================================================== --- head/sys/ufs/ffs/ffs_alloc.c Wed Jun 15 22:41:55 2011 (r223126) +++ head/sys/ufs/ffs/ffs_alloc.c Wed Jun 15 23:19:09 2011 (r223127) @@ -390,7 +390,7 @@ retry: bp->b_blkno = fsbtodb(fs, bno); if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, - ip->i_number, NULL); + ip->i_number, vp->v_type, NULL); delta = btodb(nsize - osize); DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); if (flags & IO_EXT) @@ -670,7 +670,7 @@ ffs_reallocblks_ufs1(ap) if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number, NULL); + fs->fs_bsize, ip->i_number, vp->v_type, NULL); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -878,7 +878,7 @@ ffs_reallocblks_ufs2(ap) if (!DOINGSOFTDEP(vp)) ffs_blkfree(ump, fs, ip->i_devvp, dbtofsb(fs, buflist->bs_children[i]->b_blkno), - fs->fs_bsize, ip->i_number, NULL); + fs->fs_bsize, ip->i_number, vp->v_type, NULL); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); #ifdef INVARIANTS if (!ffs_checkblk(ip, @@ -1880,7 +1880,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n", devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, size, fs->fs_fsmnt); - panic("ffs_blkfree: bad size"); + panic("ffs_blkfree_cg: bad size"); } #endif if ((u_int)bno >= fs->fs_size) { @@ -1914,7 +1914,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size } printf("dev = %s, block = %jd, fs = %s\n", devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); - panic("ffs_blkfree: freeing free block"); + panic("ffs_blkfree_cg: freeing free block"); } ffs_setblock(fs, blksfree, fragno); ffs_clusteracct(fs, cgp, fragno, 1); @@ -1937,7 +1937,7 @@ ffs_blkfree_cg(ump, fs, devvp, bno, size printf("dev = %s, block = %jd, fs = %s\n", devtoname(dev), (intmax_t)(bno + i), fs->fs_fsmnt); - panic("ffs_blkfree: freeing free frag"); + panic("ffs_blkfree_cg: freeing free frag"); } setbit(blksfree, cgbno + i); } @@ -2013,13 +2013,14 @@ ffs_blkfree_trim_completed(bip) } void -ffs_blkfree(ump, fs, devvp, bno, size, inum, dephd) +ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd) struct ufsmount *ump; struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + enum vtype vtype; struct workhead *dephd; { struct mount *mp; @@ -2034,7 +2035,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, i */ if (devvp->v_type != VREG && (devvp->v_vflag & VV_COPYONWRITE) && - ffs_snapblkfree(fs, devvp, bno, size, inum, dephd)) { + ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, dephd)) { return; } if (!ump->um_candelete) { @@ -2571,7 +2572,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) if (blksize > blkcnt) blksize = blkcnt; ffs_blkfree(ump, fs, ump->um_devvp, blkno, - blksize * fs->fs_fsize, ROOTINO, NULL); + blksize * fs->fs_fsize, ROOTINO, VDIR, NULL); blkno += blksize; blkcnt -= blksize; blksize = fs->fs_frag; Modified: head/sys/ufs/ffs/ffs_balloc.c ============================================================================== --- head/sys/ufs/ffs/ffs_balloc.c Wed Jun 15 22:41:55 2011 (r223126) +++ head/sys/ufs/ffs/ffs_balloc.c Wed Jun 15 23:19:09 2011 (r223127) @@ -506,7 +506,7 @@ fail: */ for (blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, - ip->i_number, NULL); + ip->i_number, vp->v_type, NULL); } return (error); } @@ -1052,7 +1052,7 @@ fail: */ for (blkp = allociblk; blkp < allocblk; blkp++) { ffs_blkfree(ump, fs, ip->i_devvp, *blkp, fs->fs_bsize, - ip->i_number, NULL); + ip->i_number, vp->v_type, NULL); } return (error); } Modified: head/sys/ufs/ffs/ffs_extern.h ============================================================================== --- head/sys/ufs/ffs/ffs_extern.h Wed Jun 15 22:41:55 2011 (r223126) +++ head/sys/ufs/ffs/ffs_extern.h Wed Jun 15 23:19:09 2011 (r223127) @@ -57,7 +57,7 @@ int ffs_balloc_ufs2(struct vnode *a_vp, struct ucred *a_cred, int a_flags, struct buf **a_bpp); int ffs_blkatoff(struct vnode *, off_t, char **, struct buf **); void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *, - ufs2_daddr_t, long, ino_t, struct workhead *); + ufs2_daddr_t, long, ino_t, enum vtype, struct workhead *); ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *); ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *); int ffs_checkfreefile(struct fs *, struct vnode *, ino_t); @@ -82,7 +82,7 @@ int ffs_realloccg(struct inode *, ufs2_d int ffs_sbupdate(struct ufsmount *, int, int); void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t, - struct workhead *); + enum vtype, struct workhead *); void ffs_snapremove(struct vnode *vp); int ffs_snapshot(struct mount *mp, char *snapfile); void ffs_snapshot_mount(struct mount *mp); Modified: head/sys/ufs/ffs/ffs_inode.c ============================================================================== --- head/sys/ufs/ffs/ffs_inode.c Wed Jun 15 22:41:55 2011 (r223126) +++ head/sys/ufs/ffs/ffs_inode.c Wed Jun 15 23:19:09 2011 (r223127) @@ -235,7 +235,8 @@ ffs_truncate(vp, length, flags, cred, td if (oldblks[i] == 0) continue; ffs_blkfree(ump, fs, ip->i_devvp, oldblks[i], - sblksize(fs, osize, i), ip->i_number, NULL); + sblksize(fs, osize, i), ip->i_number, + vp->v_type, NULL); } } } @@ -435,7 +436,8 @@ ffs_truncate(vp, length, flags, cred, td if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); ffs_blkfree(ump, fs, ip->i_devvp, bn, - fs->fs_bsize, ip->i_number, NULL); + fs->fs_bsize, ip->i_number, + vp->v_type, NULL); blocksreleased += nblocks; } } @@ -455,7 +457,7 @@ ffs_truncate(vp, length, flags, cred, td DIP_SET(ip, i_db[i], 0); bsize = blksize(fs, ip, i); ffs_blkfree(ump, fs, ip->i_devvp, bn, bsize, ip->i_number, - NULL); + vp->v_type, NULL); blocksreleased += btodb(bsize); } if (lastblock < 0) @@ -487,7 +489,7 @@ ffs_truncate(vp, length, flags, cred, td */ bn += numfrags(fs, newspace); ffs_blkfree(ump, fs, ip->i_devvp, bn, - oldspace - newspace, ip->i_number, NULL); + oldspace - newspace, ip->i_number, vp->v_type, NULL); blocksreleased += btodb(oldspace - newspace); } } @@ -634,7 +636,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, lev blocksreleased += blkcount; } ffs_blkfree(ip->i_ump, fs, ip->i_devvp, nb, fs->fs_bsize, - ip->i_number, NULL); + ip->i_number, vp->v_type, NULL); blocksreleased += nblocks; } Modified: head/sys/ufs/ffs/ffs_snapshot.c ============================================================================== --- head/sys/ufs/ffs/ffs_snapshot.c Wed Jun 15 22:41:55 2011 (r223126) +++ head/sys/ufs/ffs/ffs_snapshot.c Wed Jun 15 23:19:09 2011 (r223127) @@ -81,12 +81,13 @@ ffs_snapshot(mp, snapfile) } int -ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd) +ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd) struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + enum vtype vtype; struct workhead *wkhd; { return (EINVAL); @@ -174,8 +175,10 @@ static int ffs_bp_snapblk(struct vnode * * To ensure the consistency of snapshots across crashes, we must * synchronously write out copied blocks before allowing the * originals to be modified. Because of the rather severe speed - * penalty that this imposes, the following flag allows this - * crash persistence to be disabled. + * penalty that this imposes, the code normally only ensures + * persistence for the filesystem metadata contained within a + * snapshot. Setting the following flag allows this crash + * persistence to be enabled for file contents. */ int dopersistence = 0; @@ -582,7 +585,7 @@ loop: if (len != 0 && len < fs->fs_bsize) { ffs_blkfree(ump, copy_fs, vp, DIP(xp, i_db[loc]), len, xp->i_number, - NULL); + xvp->v_type, NULL); blkno = DIP(xp, i_db[loc]); DIP_SET(xp, i_db[loc], 0); } @@ -1245,7 +1248,8 @@ mapacct_ufs1(vp, oldblkp, lastblkp, fs, *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); - ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); + ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, + vp->v_type, NULL); } return (0); } @@ -1528,7 +1532,8 @@ mapacct_ufs2(vp, oldblkp, lastblkp, fs, *ip->i_snapblklist++ = lblkno; if (blkno == BLK_SNAP) blkno = blkstofrags(fs, lblkno); - ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, NULL); + ffs_blkfree(ip->i_ump, fs, vp, blkno, fs->fs_bsize, inum, + vp->v_type, NULL); } return (0); } @@ -1633,7 +1638,7 @@ ffs_snapremove(vp) DIP_SET(ip, i_db[blkno], 0); else if ((dblk == blkstofrags(fs, blkno) && ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, - ip->i_number, NULL))) { + ip->i_number, vp->v_type, NULL))) { DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - btodb(fs->fs_bsize)); DIP_SET(ip, i_db[blkno], 0); @@ -1658,7 +1663,8 @@ ffs_snapremove(vp) ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; else if ((dblk == blkstofrags(fs, blkno) && ffs_snapblkfree(fs, ip->i_devvp, dblk, - fs->fs_bsize, ip->i_number, NULL))) { + fs->fs_bsize, ip->i_number, vp->v_type, + NULL))) { ip->i_din1->di_blocks -= btodb(fs->fs_bsize); ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; @@ -1672,7 +1678,7 @@ ffs_snapremove(vp) ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; else if ((dblk == blkstofrags(fs, blkno) && ffs_snapblkfree(fs, ip->i_devvp, dblk, - fs->fs_bsize, ip->i_number, NULL))) { + fs->fs_bsize, ip->i_number, vp->v_type, NULL))) { ip->i_din2->di_blocks -= btodb(fs->fs_bsize); ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; } @@ -1720,12 +1726,13 @@ ffs_snapremove(vp) * must always have been allocated from a BLK_NOCOPY location. */ int -ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd) +ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, wkhd) struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + enum vtype vtype; struct workhead *wkhd; { struct buf *ibp, *cbp, *savedcbp = 0; @@ -1874,12 +1881,16 @@ retry: * simply copy them to the new block. Note that we need * to synchronously write snapshots that have not been * unlinked, and hence will be visible after a crash, - * to ensure their integrity. + * to ensure their integrity. At a minimum we ensure the + * integrity of the filesystem metadata, but use the + * dopersistence sysctl-setable flag to decide on the + * persistence needed for file content data. */ if (savedcbp != 0) { bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) + if ((vtype == VDIR || dopersistence) && + ip->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); continue; } @@ -1889,7 +1900,8 @@ retry: if ((error = readblock(vp, cbp, lbn)) != 0) { bzero(cbp->b_data, fs->fs_bsize); bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) + if ((vtype == VDIR || dopersistence) && + ip->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); break; } @@ -1898,12 +1910,15 @@ retry: /* * Note that we need to synchronously write snapshots that * have not been unlinked, and hence will be visible after - * a crash, to ensure their integrity. + * a crash, to ensure their integrity. At a minimum we + * ensure the integrity of the filesystem metadata, but + * use the dopersistence sysctl-setable flag to decide on + * the persistence needed for file content data. */ if (savedcbp) { vp = savedcbp->b_vp; bawrite(savedcbp); - if (dopersistence && VTOI(vp)->i_effnlink > 0) + if ((vtype == VDIR || dopersistence) && ip->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); } /* @@ -2358,12 +2373,16 @@ ffs_copyonwrite(devvp, bp) * simply copy them to the new block. Note that we need * to synchronously write snapshots that have not been * unlinked, and hence will be visible after a crash, - * to ensure their integrity. + * to ensure their integrity. At a minimum we ensure the + * integrity of the filesystem metadata, but use the + * dopersistence sysctl-setable flag to decide on the + * persistence needed for file content data. */ if (savedcbp != 0) { bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize); bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) + if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || + dopersistence) && ip->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); else launched_async_io = 1; @@ -2375,7 +2394,8 @@ ffs_copyonwrite(devvp, bp) if ((error = readblock(vp, cbp, lbn)) != 0) { bzero(cbp->b_data, fs->fs_bsize); bawrite(cbp); - if (dopersistence && ip->i_effnlink > 0) + if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || + dopersistence) && ip->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); else launched_async_io = 1; @@ -2386,12 +2406,16 @@ ffs_copyonwrite(devvp, bp) /* * Note that we need to synchronously write snapshots that * have not been unlinked, and hence will be visible after - * a crash, to ensure their integrity. + * a crash, to ensure their integrity. At a minimum we + * ensure the integrity of the filesystem metadata, but + * use the dopersistence sysctl-setable flag to decide on + * the persistence needed for file content data. */ if (savedcbp) { vp = savedcbp->b_vp; bawrite(savedcbp); - if (dopersistence && VTOI(vp)->i_effnlink > 0) + if ((devvp == bp->b_vp || bp->b_vp->v_type == VDIR || + dopersistence) && VTOI(vp)->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); else launched_async_io = 1; Modified: head/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- head/sys/ufs/ffs/ffs_softdep.c Wed Jun 15 22:41:55 2011 (r223126) +++ head/sys/ufs/ffs/ffs_softdep.c Wed Jun 15 23:19:09 2011 (r223127) @@ -5172,6 +5172,7 @@ newfreefrag(ip, blkno, size, lbn) freefrag->ff_state = ATTACHED; LIST_INIT(&freefrag->ff_jwork); freefrag->ff_inum = ip->i_number; + freefrag->ff_vtype = ITOV(ip)->v_type; freefrag->ff_blkno = blkno; freefrag->ff_fragsize = size; @@ -5216,7 +5217,7 @@ handle_workitem_freefrag(freefrag) } FREE_LOCK(&lk); ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno, - freefrag->ff_fragsize, freefrag->ff_inum, &wkhd); + freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd); ACQUIRE_LOCK(&lk); WORKITEM_FREE(freefrag, D_FREEFRAG); FREE_LOCK(&lk); @@ -5724,6 +5725,7 @@ newfreeblks(mp, ip) freeblks->fb_state = ATTACHED; freeblks->fb_uid = ip->i_uid; freeblks->fb_inum = ip->i_number; + freeblks->fb_vtype = ITOV(ip)->v_type; freeblks->fb_modrev = DIP(ip, i_modrev); freeblks->fb_devvp = ip->i_devvp; freeblks->fb_chkcnt = 0; @@ -7263,7 +7265,7 @@ freework_freeblock(freework) freeblks->fb_freecnt += btodb(bsize); FREE_LOCK(&lk); ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize, - freeblks->fb_inum, &wkhd); + freeblks->fb_inum, freeblks->fb_vtype, &wkhd); ACQUIRE_LOCK(&lk); /* * The jnewblk will be discarded and the bits in the map never @@ -7669,7 +7671,8 @@ indir_trunc(freework, dbn, lbn) freedeps++; } ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, - fs->fs_bsize, freeblks->fb_inum, &wkhd); + fs->fs_bsize, freeblks->fb_inum, + freeblks->fb_vtype, &wkhd); } } if (goingaway) { @@ -7702,7 +7705,7 @@ indir_trunc(freework, dbn, lbn) fs_pendingblocks += nblocks; dbn = dbtofsb(fs, dbn); ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize, - freeblks->fb_inum, NULL); + freeblks->fb_inum, freeblks->fb_vtype, NULL); /* Non SUJ softdep does single-threaded truncations. */ freeblks->fb_freecnt += fs_pendingblocks; if (freework->fw_blkno == dbn) { Modified: head/sys/ufs/ffs/softdep.h ============================================================================== --- head/sys/ufs/ffs/softdep.h Wed Jun 15 22:41:55 2011 (r223126) +++ head/sys/ufs/ffs/softdep.h Wed Jun 15 23:19:09 2011 (r223127) @@ -511,6 +511,7 @@ struct freefrag { ufs2_daddr_t ff_blkno; /* fragment physical block number */ long ff_fragsize; /* size of fragment being deleted */ ino_t ff_inum; /* owning inode number */ + enum vtype ff_vtype; /* owning inode's file type */ }; /* @@ -538,6 +539,7 @@ struct freeblks { ufs2_daddr_t fb_chkcnt; /* Expected blks released. */ ufs2_daddr_t fb_freecnt; /* Actual blocks released. */ ino_t fb_inum; /* inode owner of blocks */ + enum vtype fb_vtype; /* inode owner's file type */ uid_t fb_uid; /* uid of previous owner of blocks */ int fb_ref; /* Children outstanding. */ int fb_cgwait; /* cg writes outstanding. */