Date: Sun, 12 Jun 2011 19:27:05 +0000 (UTC) From: Kirk McKusick <mckusick@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r223020 - in head/sys/ufs: ffs ufs Message-ID: <201106121927.p5CJR5Wi038037@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mckusick Date: Sun Jun 12 19:27:05 2011 New Revision: 223020 URL: http://svn.freebsd.org/changeset/base/223020 Log: Update to soft updates journaling to properly track freed blocks that get claimed by snapshots. Submitted by: Jeff Roberson Tested by: Peter Holm Modified: head/sys/ufs/ffs/ffs_alloc.c head/sys/ufs/ffs/ffs_extern.h head/sys/ufs/ffs/ffs_snapshot.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ufs/ufs_vnops.c Modified: head/sys/ufs/ffs/ffs_alloc.c ============================================================================== --- head/sys/ufs/ffs/ffs_alloc.c Sun Jun 12 18:52:39 2011 (r223019) +++ head/sys/ufs/ffs/ffs_alloc.c Sun Jun 12 19:27:05 2011 (r223020) @@ -2035,7 +2035,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, i */ if (devvp->v_type != VREG && (devvp->v_vflag & VV_COPYONWRITE) && - ffs_snapblkfree(fs, devvp, bno, size, inum)) { + ffs_snapblkfree(fs, devvp, bno, size, inum, dephd)) { return; } if (!ump->um_candelete) { Modified: head/sys/ufs/ffs/ffs_extern.h ============================================================================== --- head/sys/ufs/ffs/ffs_extern.h Sun Jun 12 18:52:39 2011 (r223019) +++ head/sys/ufs/ffs/ffs_extern.h Sun Jun 12 19:27:05 2011 (r223020) @@ -80,12 +80,14 @@ int ffs_realloccg(struct inode *, ufs2_d ufs2_daddr_t, int, int, int, struct ucred *, struct buf **); int ffs_sbupdate(struct ufsmount *, int, int); void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); -int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t); +int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t, + struct workhead *); void ffs_snapremove(struct vnode *vp); int ffs_snapshot(struct mount *mp, char *snapfile); void ffs_snapshot_mount(struct mount *mp); void ffs_snapshot_unmount(struct mount *mp); void process_deferred_inactive(struct mount *mp); +void ffs_sync_snap(struct mount *, int); int ffs_syncvnode(struct vnode *vp, int waitfor); int ffs_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *); int ffs_update(struct vnode *, int); @@ -149,6 +151,9 @@ int softdep_prealloc(struct vnode *, int int softdep_journal_lookup(struct mount *, struct vnode **); void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int); void softdep_journal_fsync(struct inode *); +void softdep_buf_append(struct buf *, struct workhead *); +void softdep_inode_append(struct inode *, struct ucred *, struct workhead *); +void softdep_freework(struct workhead *); /* @@ -161,4 +166,14 @@ void softdep_journal_fsync(struct inode int ffs_rdonly(struct inode *); +TAILQ_HEAD(snaphead, inode); + +struct snapdata { + LIST_ENTRY(snapdata) sn_link; + struct snaphead sn_head; + daddr_t sn_listsize; + daddr_t *sn_blklist; + struct lock sn_lock; +}; + #endif /* !_UFS_FFS_EXTERN_H */ Modified: head/sys/ufs/ffs/ffs_snapshot.c ============================================================================== --- head/sys/ufs/ffs/ffs_snapshot.c Sun Jun 12 18:52:39 2011 (r223019) +++ head/sys/ufs/ffs/ffs_snapshot.c Sun Jun 12 19:27:05 2011 (r223020) @@ -81,12 +81,13 @@ ffs_snapshot(mp, snapfile) } int -ffs_snapblkfree(fs, devvp, bno, size, inum) +ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd) struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + struct workhead *wkhd; { return (EINVAL); } @@ -123,19 +124,16 @@ ffs_copyonwrite(devvp, bp) return (EINVAL); } +void +ffs_sync_snap(mp, waitfor) + struct mount *mp; + int waitfor; +{ +} + #else FEATURE(ffs_snapshot, "FFS snapshot support"); -TAILQ_HEAD(snaphead, inode); - -struct snapdata { - LIST_ENTRY(snapdata) sn_link; - struct snaphead sn_head; - daddr_t sn_listsize; - daddr_t *sn_blklist; - struct lock sn_lock; -}; - LIST_HEAD(, snapdata) snapfree; static struct mtx snapfree_lock; MTX_SYSINIT(ffs_snapfree, &snapfree_lock, "snapdata free list", MTX_DEF); @@ -1635,7 +1633,7 @@ ffs_snapremove(vp) DIP_SET(ip, i_db[blkno], 0); else if ((dblk == blkstofrags(fs, blkno) && ffs_snapblkfree(fs, ip->i_devvp, dblk, fs->fs_bsize, - ip->i_number))) { + ip->i_number, NULL))) { DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - btodb(fs->fs_bsize)); DIP_SET(ip, i_db[blkno], 0); @@ -1660,7 +1658,7 @@ ffs_snapremove(vp) ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; else if ((dblk == blkstofrags(fs, blkno) && ffs_snapblkfree(fs, ip->i_devvp, dblk, - fs->fs_bsize, ip->i_number))) { + fs->fs_bsize, ip->i_number, NULL))) { ip->i_din1->di_blocks -= btodb(fs->fs_bsize); ((ufs1_daddr_t *)(ibp->b_data))[loc]= 0; @@ -1674,7 +1672,7 @@ ffs_snapremove(vp) ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; else if ((dblk == blkstofrags(fs, blkno) && ffs_snapblkfree(fs, ip->i_devvp, dblk, - fs->fs_bsize, ip->i_number))) { + fs->fs_bsize, ip->i_number, NULL))) { ip->i_din2->di_blocks -= btodb(fs->fs_bsize); ((ufs2_daddr_t *)(ibp->b_data))[loc] = 0; } @@ -1722,12 +1720,13 @@ ffs_snapremove(vp) * must always have been allocated from a BLK_NOCOPY location. */ int -ffs_snapblkfree(fs, devvp, bno, size, inum) +ffs_snapblkfree(fs, devvp, bno, size, inum, wkhd) struct fs *fs; struct vnode *devvp; ufs2_daddr_t bno; long size; ino_t inum; + struct workhead *wkhd; { struct buf *ibp, *cbp, *savedcbp = 0; struct thread *td = curthread; @@ -1825,6 +1824,17 @@ retry: "Grabonremove: snapino", ip->i_number, (intmax_t)lbn, inum); #endif + /* + * If journaling is tracking this write we must add + * the work to the inode or indirect being written. + */ + if (wkhd != NULL) { + if (lbn < NDADDR) + softdep_inode_append(ip, + curthread->td_ucred, wkhd); + else + softdep_buf_append(ibp, wkhd); + } if (lbn < NDADDR) { DIP_SET(ip, i_db[lbn], bno); } else if (ip->i_ump->um_fstype == UFS1) { @@ -1902,6 +1912,8 @@ retry: * not be freed. Although space will be lost, the snapshot * will stay consistent. */ + if (error != 0 && wkhd != NULL) + softdep_freework(wkhd); lockmgr(vp->v_vnlock, LK_RELEASE, NULL); return (error); } @@ -2400,6 +2412,42 @@ ffs_copyonwrite(devvp, bp) } /* + * sync snapshots to force freework records waiting on snapshots to claim + * blocks to free. + */ +void +ffs_sync_snap(mp, waitfor) + struct mount *mp; + int waitfor; +{ + struct snapdata *sn; + struct vnode *devvp; + struct vnode *vp; + struct inode *ip; + + devvp = VFSTOUFS(mp)->um_devvp; + if ((devvp->v_vflag & VV_COPYONWRITE) == 0) + return; + for (;;) { + VI_LOCK(devvp); + sn = devvp->v_rdev->si_snapdata; + if (sn == NULL) { + VI_UNLOCK(devvp); + return; + } + if (lockmgr(&sn->sn_lock, + LK_INTERLOCK | LK_EXCLUSIVE | LK_SLEEPFAIL, + VI_MTX(devvp)) == 0) + break; + } + TAILQ_FOREACH(ip, &sn->sn_head, i_nextsnap) { + vp = ITOV(ip); + ffs_syncvnode(vp, waitfor); + } + lockmgr(&sn->sn_lock, LK_RELEASE, NULL); +} + +/* * Read the specified block into the given buffer. * Much of this boiler-plate comes from bwrite(). */ Modified: head/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- head/sys/ufs/ffs/ffs_softdep.c Sun Jun 12 18:52:39 2011 (r223019) +++ head/sys/ufs/ffs/ffs_softdep.c Sun Jun 12 19:27:05 2011 (r223020) @@ -584,6 +584,33 @@ softdep_get_depcounts(struct mount *mp, *softdepactiveaccp = 0; } +void +softdep_buf_append(bp, wkhd) + struct buf *bp; + struct workhead *wkhd; +{ + + panic("softdep_buf_appendwork called"); +} + +void +softdep_inode_append(ip, cred, wkhd) + struct inode *ip; + struct ucred *cred; + struct workhead *wkhd; +{ + + panic("softdep_inode_appendwork called"); +} + +void +softdep_freework(wkhd) + struct workhead *wkhd; +{ + + panic("softdep_freework called"); +} + #else FEATURE(softupdates, "FFS soft-updates support"); @@ -867,7 +894,7 @@ static void freework_enqueue(struct free static int handle_workitem_freeblocks(struct freeblks *, int); static int handle_complete_freeblocks(struct freeblks *, int); static void handle_workitem_indirblk(struct freework *); -static void handle_written_freework(struct freework *, int); +static void handle_written_freework(struct freework *); static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *); static struct worklist *jnewblk_merge(struct worklist *, struct worklist *, struct workhead *); @@ -1632,6 +1659,7 @@ process_truncates(vp) if (cgwait) { FREE_LOCK(&lk); sync_cgs(mp, MNT_WAIT); + ffs_sync_snap(mp, MNT_WAIT); ACQUIRE_LOCK(&lk); continue; } @@ -5922,7 +5950,7 @@ complete_trunc_indir(freework) */ if (bp == NULL) { if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd)) - handle_written_freework(freework, 0); + handle_written_freework(freework); else WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd, &freework->fw_list); @@ -5974,7 +6002,7 @@ out: */ if (totblks > datablocks) return (0); - return (totblks - datablocks); + return (datablocks - totblks); } /* @@ -7228,6 +7256,7 @@ freework_freeblock(freework) cancel_jnewblk(jnewblk, &wkhd); needj = 0; } else if (needj) { + freework->fw_state |= DELAYEDFREE; freeblks->fb_cgwait++; WORKLIST_INSERT(&wkhd, &freework->fw_list); } @@ -7241,7 +7270,7 @@ freework_freeblock(freework) * made it to disk. We can immediately free the freeblk. */ if (needj == 0) - handle_written_freework(freework, 0); + handle_written_freework(freework); } /* @@ -7256,7 +7285,8 @@ freework_enqueue(freework) struct freeblks *freeblks; freeblks = freework->fw_freeblks; - WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list); + if ((freework->fw_state & INPROGRESS) == 0) + WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list); if ((freeblks->fb_state & (ONWORKLIST | INPROGRESS | ALLCOMPLETE)) == ALLCOMPLETE && LIST_EMPTY(&freeblks->fb_jblkdephd)) @@ -7282,13 +7312,14 @@ handle_workitem_indirblk(freework) ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; if (freework->fw_state & DEPCOMPLETE) { - handle_written_freework(freework, 0); + handle_written_freework(freework); return; } if (freework->fw_off == NINDIR(fs)) { freework_freeblock(freework); return; } + freework->fw_state |= INPROGRESS; FREE_LOCK(&lk); indir_trunc(freework, fsbtodb(fs, freework->fw_blkno), freework->fw_lbn); @@ -7301,16 +7332,16 @@ handle_workitem_indirblk(freework) * the freeblks is added back to the worklist if there is more work to do. */ static void -handle_written_freework(freework, cgwrite) +handle_written_freework(freework) struct freework *freework; - int cgwrite; { struct freeblks *freeblks; struct freework *parent; freeblks = freework->fw_freeblks; parent = freework->fw_parent; - freeblks->fb_cgwait -= cgwrite; + if (freework->fw_state & DELAYEDFREE) + freeblks->fb_cgwait--; freework->fw_state |= COMPLETE; if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE) WORKITEM_FREE(freework, D_FREEWORK); @@ -7552,6 +7583,8 @@ indir_trunc(freework, dbn, lbn) return; } ACQUIRE_LOCK(&lk); + /* Protects against a race with complete_trunc_indir(). */ + freework->fw_state &= ~INPROGRESS; /* * If we have an indirdep we need to enforce the truncation order * and discard it when it is complete. @@ -7675,7 +7708,7 @@ indir_trunc(freework, dbn, lbn) if (freework->fw_blkno == dbn) { freework->fw_state |= ALLCOMPLETE; ACQUIRE_LOCK(&lk); - handle_written_freework(freework, 0); + handle_written_freework(freework); FREE_LOCK(&lk); } return; @@ -10368,8 +10401,7 @@ softdep_disk_write_complete(bp) continue; case D_FREEWORK: - /* Freework on an indirect block, not bmsafemap. */ - handle_written_freework(WK_FREEWORK(wk), 0); + handle_written_freework(WK_FREEWORK(wk)); break; case D_JSEGDEP: @@ -10540,7 +10572,7 @@ handle_jwork(wkhd) free_freedep(WK_FREEDEP(wk)); continue; case D_FREEWORK: - handle_written_freework(WK_FREEWORK(wk), 1); + handle_written_freework(WK_FREEWORK(wk)); continue; default: panic("handle_jwork: Unknown type %s\n", @@ -12738,6 +12770,53 @@ clear_inodedeps(td) } } +void +softdep_buf_append(bp, wkhd) + struct buf *bp; + struct workhead *wkhd; +{ + struct worklist *wk; + + ACQUIRE_LOCK(&lk); + while ((wk = LIST_FIRST(wkhd)) != NULL) { + WORKLIST_REMOVE(wk); + WORKLIST_INSERT(&bp->b_dep, wk); + } + FREE_LOCK(&lk); + +} + +void +softdep_inode_append(ip, cred, wkhd) + struct inode *ip; + struct ucred *cred; + struct workhead *wkhd; +{ + struct buf *bp; + struct fs *fs; + int error; + + fs = ip->i_fs; + error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->fs_bsize, cred, &bp); + if (error) { + softdep_freework(wkhd); + return; + } + softdep_buf_append(bp, wkhd); + bqrelse(bp); +} + +void +softdep_freework(wkhd) + struct workhead *wkhd; +{ + + ACQUIRE_LOCK(&lk); + handle_jwork(wkhd); + FREE_LOCK(&lk); +} + /* * Function to determine if the buffer has outstanding dependencies * that will cause a roll-back if the buffer is written. If wantcount Modified: head/sys/ufs/ufs/ufs_vnops.c ============================================================================== --- head/sys/ufs/ufs/ufs_vnops.c Sun Jun 12 18:52:39 2011 (r223019) +++ head/sys/ufs/ufs/ufs_vnops.c Sun Jun 12 19:27:05 2011 (r223020) @@ -1838,6 +1838,8 @@ ufs_mkdir(ap) #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { + if (DOINGSOFTDEP(tvp)) + softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); return (error); @@ -1850,6 +1852,8 @@ ufs_mkdir(ap) #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { + if (DOINGSOFTDEP(tvp)) + softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vput(tvp); return (error); @@ -2608,6 +2612,8 @@ ufs_makeinode(mode, dvp, vpp, cnp) #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { + if (DOINGSOFTDEP(tvp)) + softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); return (error); @@ -2620,6 +2626,8 @@ ufs_makeinode(mode, dvp, vpp, cnp) #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { + if (DOINGSOFTDEP(tvp)) + softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vput(tvp); return (error);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201106121927.p5CJR5Wi038037>