From owner-svn-src-all@FreeBSD.ORG Tue Jan 22 07:40:38 2013 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by hub.freebsd.org (Postfix) with ESMTP id D6E27D1A; Tue, 22 Jan 2013 07:40:38 +0000 (UTC) (envelope-from scottl@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id C85A4A3B; Tue, 22 Jan 2013 07:40:38 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id r0M7eckH060797; Tue, 22 Jan 2013 07:40:38 GMT (envelope-from scottl@svn.freebsd.org) Received: (from scottl@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id r0M7ecXj060796; Tue, 22 Jan 2013 07:40:38 GMT (envelope-from scottl@svn.freebsd.org) Message-Id: <201301220740.r0M7ecXj060796@svn.freebsd.org> From: Scott Long Date: Tue, 22 Jan 2013 07:40:38 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r245779 - stable/9/sys/ufs/ffs X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 22 Jan 2013 07:40:38 -0000 Author: scottl Date: Tue Jan 22 07:40:38 2013 New Revision: 245779 URL: http://svnweb.freebsd.org/changeset/base/245779 Log: MFC r243018: - Fix a truncation bug with softdep journaling that could leak blocks on crash. When truncating a file that never made it to disk we use the canceled allocation dependencies to hold the journal records until the truncation completes. Previously allocdirect dependencies on the id_bufwait list were not considered and their journal space could expire before the bitmaps were written. Cancel them and attach them to the freeblks as we do for other allocdirects. - Add KTR traces that were used to debug this problem. - When adding jsegdeps, always use jwork_insert() so we don't have more than one segdep on a given jwork list. Modified: stable/9/sys/ufs/ffs/ffs_softdep.c Directory Properties: stable/9/sys/ (props changed) Modified: stable/9/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- stable/9/sys/ufs/ffs/ffs_softdep.c Tue Jan 22 07:38:43 2013 (r245778) +++ stable/9/sys/ufs/ffs/ffs_softdep.c Tue Jan 22 07:40:38 2013 (r245779) @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -92,6 +93,8 @@ __FBSDID("$FreeBSD$"); #include +#define KTR_SUJ 0 /* Define to KTR_SPARE. */ + #ifndef SOFTUPDATES int @@ -769,6 +772,34 @@ struct pagedep_hashhead; struct bmsafemap_hashhead; /* + * Private journaling structures. + */ +struct jblocks { + struct jseglst jb_segs; /* TAILQ of current segments. */ + struct jseg *jb_writeseg; /* Next write to complete. */ + struct jseg *jb_oldestseg; /* Oldest segment with valid entries. */ + struct jextent *jb_extent; /* Extent array. */ + uint64_t jb_nextseq; /* Next sequence number. */ + uint64_t jb_oldestwrseq; /* Oldest written sequence number. */ + uint8_t jb_needseg; /* Need a forced segment. */ + uint8_t jb_suspended; /* Did journal suspend writes? */ + int jb_avail; /* Available extents. */ + int jb_used; /* Last used extent. */ + int jb_head; /* Allocator head. */ + int jb_off; /* Allocator extent offset. */ + int jb_blocks; /* Total disk blocks covered. */ + int jb_free; /* Total disk blocks free. */ + int jb_min; /* Minimum free space. */ + int jb_low; /* Low on space. */ + int jb_age; /* Insertion time of oldest rec. */ +}; + +struct jextent { + ufs2_daddr_t je_daddr; /* Disk block address. */ + int je_blocks; /* Disk block count. */ +}; + +/* * Internal function prototypes. */ static void softdep_error(char *, int); @@ -2273,19 +2304,15 @@ static void indirblk_insert(freework) struct freework *freework; { - struct freeblks *freeblks; - struct jsegdep *jsegdep; - struct worklist *wk; + struct jblocks *jblocks; + struct jseg *jseg; - freeblks = freework->fw_freeblks; - LIST_FOREACH(wk, &freeblks->fb_jwork, wk_list) - if (wk->wk_type == D_JSEGDEP) - break; - if (wk == NULL) + jblocks = VFSTOUFS(freework->fw_list.wk_mp)->softdep_jblocks; + jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst); + if (jseg == NULL) return; - jsegdep = WK_JSEGDEP(wk); - LIST_INSERT_HEAD(&jsegdep->jd_seg->js_indirs, freework, fw_segs); + LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs); TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp, freework->fw_blkno), freework, fw_next); freework->fw_state &= ~DEPCOMPLETE; @@ -2438,31 +2465,6 @@ softdep_unmount(mp) journal_unmount(mp); } -struct jblocks { - struct jseglst jb_segs; /* TAILQ of current segments. */ - struct jseg *jb_writeseg; /* Next write to complete. */ - struct jseg *jb_oldestseg; /* Oldest segment with valid entries. */ - struct jextent *jb_extent; /* Extent array. */ - uint64_t jb_nextseq; /* Next sequence number. */ - uint64_t jb_oldestwrseq; /* Oldest written sequence number. */ - uint8_t jb_needseg; /* Need a forced segment. */ - uint8_t jb_suspended; /* Did journal suspend writes? */ - int jb_avail; /* Available extents. */ - int jb_used; /* Last used extent. */ - int jb_head; /* Allocator head. */ - int jb_off; /* Allocator extent offset. */ - int jb_blocks; /* Total disk blocks covered. */ - int jb_free; /* Total disk blocks free. */ - int jb_min; /* Minimum free space. */ - int jb_low; /* Low on space. */ - int jb_age; /* Insertion time of oldest rec. */ -}; - -struct jextent { - ufs2_daddr_t je_daddr; /* Disk block address. */ - int je_blocks; /* Disk block count. */ -}; - static struct jblocks * jblocks_create(void) { @@ -3668,7 +3670,7 @@ handle_written_jnewblk(jnewblk) */ freefrag = WK_FREEFRAG(jnewblk->jn_dep); freefrag->ff_jdep = NULL; - WORKLIST_INSERT(&freefrag->ff_jwork, &jsegdep->jd_list); + jwork_insert(&freefrag->ff_jwork, jsegdep); break; case D_FREEWORK: /* @@ -3676,8 +3678,7 @@ handle_written_jnewblk(jnewblk) */ freework = WK_FREEWORK(jnewblk->jn_dep); freework->fw_jnewblk = NULL; - WORKLIST_INSERT(&freework->fw_freeblks->fb_jwork, - &jsegdep->jd_list); + jwork_insert(&freework->fw_freeblks->fb_jwork, jsegdep); break; default: panic("handle_written_jnewblk: Unknown type %d.", @@ -3707,6 +3708,7 @@ cancel_jfreefrag(jfreefrag) jfreefrag->fr_freefrag = NULL; free_jfreefrag(jfreefrag); freefrag->ff_state |= DEPCOMPLETE; + CTR1(KTR_SUJ, "cancel_jfreefrag: blkno %jd", freefrag->ff_blkno); } /* @@ -3770,7 +3772,7 @@ handle_written_jblkdep(jblkdep) jblkdep->jb_jsegdep = NULL; freeblks = jblkdep->jb_freeblks; LIST_REMOVE(jblkdep, jb_deps); - WORKLIST_INSERT(&freeblks->fb_jwork, &jsegdep->jd_list); + jwork_insert(&freeblks->fb_jwork, jsegdep); /* * If the freeblks is all journaled, we can add it to the worklist. */ @@ -3973,6 +3975,7 @@ cancel_jfreeblk(freeblks, blkno) } if (jblkdep == NULL) return; + CTR1(KTR_SUJ, "cancel_jfreeblk: blkno %jd", blkno); free_jsegdep(jblkdep->jb_jsegdep); LIST_REMOVE(jblkdep, jb_deps); WORKITEM_FREE(jfreeblk, D_JFREEBLK); @@ -4213,6 +4216,7 @@ cancel_jnewblk(jnewblk, wkhd) { struct jsegdep *jsegdep; + CTR1(KTR_SUJ, "cancel_jnewblk: blkno %jd", jnewblk->jn_blkno); jsegdep = jnewblk->jn_jsegdep; if (jnewblk->jn_jsegdep == NULL || jnewblk->jn_dep == NULL) panic("cancel_jnewblk: Invalid state"); @@ -4904,6 +4908,10 @@ softdep_setup_blkmapdep(bp, mp, newblkno } #endif } + + CTR3(KTR_SUJ, + "softdep_setup_blkmapdep: blkno %jd frags %d oldfrags %d", + newblkno, frags, oldfrags); ACQUIRE_LOCK(&lk); if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0) panic("softdep_setup_blkmapdep: found block"); @@ -5065,6 +5073,10 @@ softdep_setup_allocdirect(ip, off, newbl else freefrag = NULL; + CTR6(KTR_SUJ, + "softdep_setup_allocdirect: ino %d blkno %jd oldblkno %jd " + "off %jd newsize %ld oldsize %d", + ip->i_number, newblkno, oldblkno, off, newsize, oldsize); ACQUIRE_LOCK(&lk); if (off >= NDADDR) { if (lbn > 0) @@ -5343,6 +5355,8 @@ newfreefrag(ip, blkno, size, lbn) struct freefrag *freefrag; struct fs *fs; + CTR4(KTR_SUJ, "newfreefrag: ino %d blkno %jd size %ld lbn %jd", + ip->i_number, blkno, size, lbn); fs = ip->i_fs; if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag) panic("newfreefrag: frag size"); @@ -5378,6 +5392,9 @@ handle_workitem_freefrag(freefrag) struct ufsmount *ump = VFSTOUFS(freefrag->ff_list.wk_mp); struct workhead wkhd; + CTR3(KTR_SUJ, + "handle_workitem_freefrag: ino %d blkno %jd size %ld", + freefrag->ff_inum, freefrag->ff_blkno, freefrag->ff_fragsize); /* * It would be illegal to add new completion items to the * freefrag after it was schedule to be done so it must be @@ -5596,6 +5613,9 @@ softdep_setup_allocindir_page(ip, lbn, b if (lbn != nbp->b_lblkno) panic("softdep_setup_allocindir_page: lbn %jd != lblkno %jd", lbn, bp->b_lblkno); + CTR4(KTR_SUJ, + "softdep_setup_allocindir_page: ino %d blkno %jd oldblkno %jd " + "lbn %jd", ip->i_number, newblkno, oldblkno, lbn); ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page"); mp = UFSTOVFS(ip->i_ump); aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn); @@ -5634,6 +5654,9 @@ softdep_setup_allocindir_meta(nbp, ip, b ufs_lbn_t lbn; int dflags; + CTR3(KTR_SUJ, + "softdep_setup_allocindir_meta: ino %d blkno %jd ptrno %d", + ip->i_number, newblkno, ptrno); lbn = nbp->b_lblkno; ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta"); aip = newallocindir(ip, ptrno, newblkno, 0, lbn); @@ -6238,6 +6261,7 @@ softdep_journal_freeblocks(ip, cred, len int flags; /* IO_EXT and/or IO_NORMAL */ { struct freeblks *freeblks, *fbn; + struct worklist *wk, *wkn; struct inodedep *inodedep; struct jblkdep *jblkdep; struct allocdirect *adp, *adpn; @@ -6272,6 +6296,8 @@ softdep_journal_freeblocks(ip, cred, len if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED && length == 0) needj = 0; + CTR3(KTR_SUJ, "softdep_journal_freeblks: ip %d length %ld needj %d", + ip->i_number, length, needj); FREE_LOCK(&lk); /* * Calculate the lbn that we are truncating to. This results in -1 @@ -6425,6 +6451,21 @@ softdep_journal_freeblocks(ip, cred, len cancel_allocdirect(&inodedep->id_extupdt, adp, freeblks); /* + * Scan the bufwait list for newblock dependencies that will never + * make it to disk. + */ + LIST_FOREACH_SAFE(wk, &inodedep->id_bufwait, wk_list, wkn) { + if (wk->wk_type != D_ALLOCDIRECT) + continue; + adp = WK_ALLOCDIRECT(wk); + if (((flags & IO_NORMAL) != 0 && (adp->ad_offset > iboff)) || + ((flags & IO_EXT) != 0 && (adp->ad_state & EXTDATA))) { + cancel_jfreeblk(freeblks, adp->ad_newblkno); + cancel_newblk(WK_NEWBLK(wk), NULL, &freeblks->fb_jwork); + WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk); + } + } + /* * Add journal work. */ LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps) @@ -6563,6 +6604,8 @@ softdep_setup_freeblocks(ip, length, fla ufs_lbn_t tmpval; ufs_lbn_t lbn; + CTR2(KTR_SUJ, "softdep_setup_freeblks: ip %d length %ld", + ip->i_number, length); fs = ip->i_fs; mp = UFSTOVFS(ip->i_ump); if (length != 0) @@ -7088,6 +7131,8 @@ cancel_newblk(newblk, wk, wkhd) { struct jnewblk *jnewblk; + CTR1(KTR_SUJ, "cancel_newblk: blkno %jd", newblk->nb_newblkno); + newblk->nb_state |= GOINGAWAY; /* * Previously we traversed the completedhd on each indirdep @@ -7456,6 +7501,9 @@ freework_freeblock(freework) } FREE_LOCK(&lk); freeblks_free(ump, freeblks, btodb(bsize)); + CTR4(KTR_SUJ, + "freework_freeblock: ino %d blkno %jd lbn %jd size %ld", + freeblks->fb_inum, freework->fw_blkno, freework->fw_lbn, bsize); ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize, freeblks->fb_inum, freeblks->fb_vtype, &wkhd); ACQUIRE_LOCK(&lk); @@ -7889,6 +7937,9 @@ indir_trunc(freework, dbn, lbn) &freedep->fd_list); freedeps++; } + CTR3(KTR_SUJ, + "indir_trunc: ino %d blkno %jd size %ld", + freeblks->fb_inum, nb, fs->fs_bsize); ffs_blkfree(ump, fs, freeblks->fb_devvp, nb, fs->fs_bsize, freeblks->fb_inum, freeblks->fb_vtype, &wkhd); @@ -7924,6 +7975,9 @@ indir_trunc(freework, dbn, lbn) * If we're not journaling we can free the indirect now. */ dbn = dbtofsb(fs, dbn); + CTR3(KTR_SUJ, + "indir_trunc 2: ino %d blkno %jd size %ld", + freeblks->fb_inum, dbn, fs->fs_bsize); ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize, freeblks->fb_inum, freeblks->fb_vtype, NULL); /* Non SUJ softdep does single-threaded truncations. */ @@ -10359,6 +10413,10 @@ softdep_setup_blkfree(mp, bp, blkno, fra int i; #endif + CTR3(KTR_SUJ, + "softdep_setup_blkfree: blkno %jd frags %d wk head %p", + blkno, frags, wkhd); + ACQUIRE_LOCK(&lk); /* Lookup the bmsafemap so we track when it is dirty. */ fs = VFSTOUFS(mp)->um_fs; @@ -10370,6 +10428,9 @@ softdep_setup_blkfree(mp, bp, blkno, fra */ if (wkhd) { while ((wk = LIST_FIRST(wkhd)) != NULL) { + CTR2(KTR_SUJ, + "softdep_setup_blkfree: blkno %jd wk type %d", + blkno, wk->wk_type); WORKLIST_REMOVE(wk); if (wk->wk_type != D_JNEWBLK) { WORKLIST_INSERT(&bmsafemap->sm_freehd, wk);