Date: Mon, 25 Jan 2010 23:30:53 +0000 (UTC) From: Jeff Roberson <jeff@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r202991 - in projects/suj: 6/sbin/fsck_ffs 6/sys/ufs/ffs 7/sbin/fsck_ffs 7/sbin/fsdb 7/sys/ufs/ffs 8/sbin/fsck_ffs 8/sbin/fsdb 8/sys/ufs/ffs Message-ID: <201001252330.o0PNUrlq010722@svn.freebsd.org>
index | next in thread | raw e-mail
Author: jeff Date: Mon Jan 25 23:30:53 2010 New Revision: 202991 URL: http://svn.freebsd.org/changeset/base/202991 Log: - Merge r202989 and r202990 from suj/head Modified: projects/suj/6/sbin/fsck_ffs/fsck.h projects/suj/6/sbin/fsck_ffs/main.c projects/suj/6/sbin/fsck_ffs/suj.c projects/suj/6/sys/ufs/ffs/ffs_inode.c projects/suj/6/sys/ufs/ffs/ffs_softdep.c projects/suj/6/sys/ufs/ffs/ffs_vfsops.c projects/suj/6/sys/ufs/ffs/fs.h projects/suj/7/sbin/fsck_ffs/fsck.h projects/suj/7/sbin/fsck_ffs/main.c projects/suj/7/sbin/fsck_ffs/suj.c projects/suj/7/sbin/fsdb/fsdb.c projects/suj/7/sys/ufs/ffs/ffs_inode.c projects/suj/7/sys/ufs/ffs/ffs_softdep.c projects/suj/7/sys/ufs/ffs/ffs_vfsops.c projects/suj/7/sys/ufs/ffs/fs.h projects/suj/8/sbin/fsck_ffs/fsck.h projects/suj/8/sbin/fsck_ffs/main.c projects/suj/8/sbin/fsck_ffs/suj.c projects/suj/8/sbin/fsdb/fsdb.c projects/suj/8/sys/ufs/ffs/ffs_inode.c projects/suj/8/sys/ufs/ffs/ffs_softdep.c projects/suj/8/sys/ufs/ffs/ffs_vfsops.c projects/suj/8/sys/ufs/ffs/fs.h Modified: projects/suj/6/sbin/fsck_ffs/fsck.h ============================================================================== --- projects/suj/6/sbin/fsck_ffs/fsck.h Mon Jan 25 23:27:21 2010 (r202990) +++ projects/suj/6/sbin/fsck_ffs/fsck.h Mon Jan 25 23:30:53 2010 (r202991) @@ -385,4 +385,4 @@ void rwerror(const char *mesg, ufs2_dad void sblock_init(void); void setinodebuf(ino_t); int setup(char *dev); -void suj_check(const char *filesys); +int suj_check(const char *filesys); Modified: projects/suj/6/sbin/fsck_ffs/main.c ============================================================================== --- projects/suj/6/sbin/fsck_ffs/main.c Mon Jan 25 23:27:21 2010 (r202990) +++ projects/suj/6/sbin/fsck_ffs/main.c Mon Jan 25 23:30:53 2010 (r202991) @@ -229,8 +229,9 @@ checkfilesys(char *filesys) if ((fsreadfd = open(filesys, O_RDONLY)) < 0 || readsb(0) == 0) exit(3); /* Cannot read superblock */ close(fsreadfd); - if (sblock.fs_flags & FS_NEEDSFSCK) - exit(4); /* Earlier background failed */ + /* Earlier background failed or journaled */ + if (sblock.fs_flags & (FS_NEEDSFSCK | FS_SUJ)) + exit(4); if ((sblock.fs_flags & FS_DOSOFTDEP) == 0) exit(5); /* Not running soft updates */ size = MIBSIZE; @@ -360,6 +361,23 @@ checkfilesys(char *filesys) sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize); return (0); } + /* + * Determine if we can and should do journal recovery. + */ + if ((sblock.fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == FS_SUJ) { + if (preen || reply("USE JOURNAL?")) { + if (suj_check(filesys) == 0) + goto out; + /* suj_check failed, fall through. */ + } + printf("** Skipping journal, falling through to full fsck\n"); + /* + * Write the superblock so we don't try to recover the + * journal on another pass. + */ + sblock.fs_mtime = time(NULL); + sbdirty(); + } /* * Cleared if any questions answered no. Used to decide if @@ -454,7 +472,6 @@ checkfilesys(char *filesys) inocleanup(); if (fsmodified) { sblock.fs_time = time(NULL); - sblock.fs_mtime = time(NULL); sbdirty(); } if (cvtlevel && sblk.b_dirty) { @@ -485,6 +502,7 @@ checkfilesys(char *filesys) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n"); if (rerun) printf("\n***** PLEASE RERUN FSCK *****\n"); +out: if (mntp != NULL) { /* * We modified a mounted file system. Do a mount update on Modified: projects/suj/6/sbin/fsck_ffs/suj.c ============================================================================== --- projects/suj/6/sbin/fsck_ffs/suj.c Mon Jan 25 23:27:21 2010 (r202990) +++ projects/suj/6/sbin/fsck_ffs/suj.c Mon Jan 25 23:30:53 2010 (r202991) @@ -49,7 +49,8 @@ __FBSDID("$FreeBSD$"); static void ino_decr(ino_t); -#define SUJ_HASHSIZE 128 +#define DOTDOT_OFFSET DIRECTSIZ(1) +#define SUJ_HASHSIZE 2048 #define SUJ_HASHMASK (SUJ_HASHSIZE - 1) #define SUJ_HASH(x) ((x * 2654435761) & SUJ_HASHMASK) @@ -68,7 +69,9 @@ TAILQ_HEAD(srechd, suj_rec); struct suj_ino { LIST_ENTRY(suj_ino) si_next; struct srechd si_recs; + struct srechd si_newrecs; struct srechd si_movs; + struct jtrncrec *si_trunc; ino_t si_ino; int si_nlinkadj; int si_skipparent; @@ -90,6 +93,7 @@ struct data_blk { uint8_t *db_buf; ufs2_daddr_t db_blk; int db_size; + int db_dirty; }; struct ino_blk { @@ -106,6 +110,8 @@ struct suj_cg { struct inohd sc_inohash[SUJ_HASHSIZE]; struct iblkhd sc_iblkhash[SUJ_HASHSIZE]; struct ino_blk *sc_lastiblk; + struct suj_ino *sc_lastino; + struct suj_blk *sc_lastblk; uint8_t *sc_cgbuf; struct cg *sc_cgp; int sc_dirty; @@ -114,6 +120,8 @@ struct suj_cg { LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE]; LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE]; +struct suj_cg *lastcg; +struct data_blk *lastblk; TAILQ_HEAD(seghd, suj_seg) allsegs; uint64_t oldseq; @@ -131,6 +139,8 @@ uint64_t jbytes; uint64_t jrecs; typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); +static void ino_trunc(ino_t ino, off_t size); +static void ino_build(struct suj_ino *sino); static void * errmalloc(size_t n) @@ -159,12 +169,6 @@ opendisk(const char *devnam) disk->d_error); } fs = &disk->d_fs; - /* - * Setup a few things so reply() can work. - */ - bcopy(fs, &sblock, sizeof(sblock)); - fsreadfd = disk->d_fd; - fswritefd = disk->d_fd; } /* @@ -198,8 +202,6 @@ closedisk(const char *devnam) free(disk); disk = NULL; fs = NULL; - fsreadfd = -1; - fswritefd = -1; } /* @@ -216,10 +218,14 @@ cg_lookup(int cgx) abort(); errx(1, "Bad cg number %d", cgx); } + if (lastcg && lastcg->sc_cgx == cgx) + return (lastcg); hd = &cghash[SUJ_HASH(cgx)]; LIST_FOREACH(sc, hd, sc_next) - if (sc->sc_cgx == cgx) + if (sc->sc_cgx == cgx) { + lastcg = sc; return (sc); + } sc = errmalloc(sizeof(*sc)); bzero(sc, sizeof(*sc)); sc->sc_cgbuf = errmalloc(fs->fs_bsize); @@ -245,6 +251,8 @@ ino_lookup(ino_t ino, int creat) struct suj_cg *sc; sc = cg_lookup(ino_to_cg(fs, ino)); + if (sc->sc_lastino && sc->sc_lastino->si_ino == ino) + return (sc->sc_lastino); hd = &sc->sc_inohash[SUJ_HASH(ino)]; LIST_FOREACH(sino, hd, si_next) if (sino->si_ino == ino) @@ -256,6 +264,7 @@ ino_lookup(ino_t ino, int creat) sino->si_ino = ino; sino->si_nlinkadj = 0; TAILQ_INIT(&sino->si_recs); + TAILQ_INIT(&sino->si_newrecs); TAILQ_INIT(&sino->si_movs); LIST_INSERT_HEAD(hd, sino, si_next); @@ -274,7 +283,9 @@ blk_lookup(ufs2_daddr_t blk, int creat) struct blkhd *hd; sc = cg_lookup(dtog(fs, blk)); - hd = &sc->sc_blkhash[SUJ_HASH(blk)]; + if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk) + return (sc->sc_lastblk); + hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))]; LIST_FOREACH(sblk, hd, sb_next) if (sblk->sb_blk == blk) return (sblk); @@ -289,16 +300,18 @@ blk_lookup(ufs2_daddr_t blk, int creat) return (sblk); } -static uint8_t * -dblk_read(ufs2_daddr_t blk, int size) +static struct data_blk * +dblk_lookup(ufs2_daddr_t blk) { struct data_blk *dblk; struct dblkhd *hd; - hd = &dbhash[SUJ_HASH(blk)]; + hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))]; + if (lastblk && lastblk->db_blk == blk) + return (lastblk); LIST_FOREACH(dblk, hd, db_next) if (dblk->db_blk == blk) - goto found; + return (dblk); /* * The inode block wasn't located, allocate a new one. */ @@ -306,7 +319,15 @@ dblk_read(ufs2_daddr_t blk, int size) bzero(dblk, sizeof(*dblk)); LIST_INSERT_HEAD(hd, dblk, db_next); dblk->db_blk = blk; -found: + return (dblk); +} + +static uint8_t * +dblk_read(ufs2_daddr_t blk, int size) +{ + struct data_blk *dblk; + + dblk = dblk_lookup(blk); /* * I doubt size mismatches can happen in practice but it is trivial * to handle. @@ -322,6 +343,33 @@ found: return (dblk->db_buf); } +static void +dblk_dirty(ufs2_daddr_t blk) +{ + struct data_blk *dblk; + + dblk = dblk_lookup(blk); + dblk->db_dirty = 1; +} + +static void +dblk_write(void) +{ + struct data_blk *dblk; + int i; + + for (i = 0; i < SUJ_HASHSIZE; i++) { + LIST_FOREACH(dblk, &dbhash[i], db_next) { + if (dblk->db_dirty == 0 || dblk->db_size == 0) + continue; + if (bwrite(disk, fsbtodb(fs, dblk->db_blk), + dblk->db_buf, dblk->db_size) == -1) + err(1, "Unable to write block %jd", + dblk->db_blk); + } + } +} + static union dinode * ino_read(ino_t ino) { @@ -333,7 +381,10 @@ ino_read(ino_t ino) blk = ino_to_fsba(fs, ino); sc = cg_lookup(ino_to_cg(fs, ino)); - hd = &sc->sc_iblkhash[SUJ_HASH(blk)]; + iblk = sc->sc_lastiblk; + if (iblk && iblk->ib_blk == blk) + goto found; + hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; LIST_FOREACH(iblk, hd, ib_next) if (iblk->ib_blk == blk) goto found; @@ -371,7 +422,7 @@ ino_dirty(ino_t ino) iblk->ib_dirty = 1; return; } - hd = &sc->sc_iblkhash[SUJ_HASH(blk)]; + hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; LIST_FOREACH(iblk, hd, ib_next) { if (iblk->ib_blk == blk) { iblk->ib_dirty = 1; @@ -612,22 +663,22 @@ blk_free(ufs2_daddr_t bno, int mask, int * to fetch a specific block. */ static ufs2_daddr_t -indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn, int level) +indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn) { ufs2_daddr_t *bap2; ufs2_daddr_t *bap1; ufs_lbn_t lbnadd; ufs_lbn_t base; + int level; int i; if (blk == 0) return (0); - if (cur == lbn) - return (blk); - if (level == 0 && lbn < 0) { - abort(); + level = lbn_level(cur); + if (level == -1) + errx(1, "Invalid indir lbn %jd", lbn); + if (level == 0 && lbn < 0) errx(1, "Invalid lbn %jd", lbn); - } bap2 = (void *)dblk_read(blk, fs->fs_bsize); bap1 = (void *)bap2; lbnadd = 1; @@ -638,11 +689,9 @@ indir_blkatoff(ufs2_daddr_t blk, ino_t i i = (lbn - base) / lbnadd; else i = (-lbn - base) / lbnadd; - if (i < 0 || i >= NINDIR(fs)) { - abort(); + if (i < 0 || i >= NINDIR(fs)) errx(1, "Invalid indirect index %d produced by lbn %jd", i, lbn); - } if (level == 0) cur = base + (i * lbnadd); else @@ -657,7 +706,7 @@ indir_blkatoff(ufs2_daddr_t blk, ino_t i abort(); errx(1, "Invalid lbn %jd at level 0", lbn); } - return indir_blkatoff(blk, ino, cur, lbn, level - 1); + return indir_blkatoff(blk, ino, cur, lbn); } /* @@ -685,14 +734,10 @@ ino_blkatoff(union dinode *ip, ino_t ino return (ip->dp2.di_extb[lbn]); } /* - * And now direct and indirect. Verify that the lbn does not - * exceed the size required to store the file by asking for - * the lbn of the last byte. These blocks should be 0 anyway - * so this simply saves the traversal. + * Now direct and indirect. */ - if (lbn > 0 && lbn > lblkno(fs, DIP(ip, di_size) - 1)) - return (0); - if (lbn < 0 && -lbn > lblkno(fs, DIP(ip, di_size) - 1)) + if (DIP(ip, di_mode) == IFLNK && + DIP(ip, di_size) < fs->fs_maxsymlinklen) return (0); if (lbn >= 0 && lbn < NDADDR) { *frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn)); @@ -703,7 +748,7 @@ ino_blkatoff(union dinode *ip, ino_t ino for (i = 0, tmpval = NINDIR(fs), cur = NDADDR; i < NIADDR; i++, tmpval *= NINDIR(fs), cur = next) { next = cur + tmpval; - if (lbn == -cur) + if (lbn == -cur - i) return (DIP(ip, di_ib[i])); /* * Determine whether the lbn in question is within this tree. @@ -712,8 +757,7 @@ ino_blkatoff(union dinode *ip, ino_t ino continue; if (lbn > 0 && lbn >= next) continue; - - return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn, i); + return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn); } errx(1, "lbn %jd not in ino", lbn); } @@ -760,7 +804,10 @@ ino_isat(ino_t parent, off_t diroff, ino *mode = DIP(dip, di_mode); if ((*mode & IFMT) != IFDIR) { if (debug) { - /* This can happen if the parent inode was reallocated. */ + /* + * This can happen if the parent inode + * was reallocated. + */ if (*mode != 0) printf("Directory %d has bad mode %o\n", parent, *mode); @@ -791,7 +838,7 @@ ino_isat(ino_t parent, off_t diroff, ino * certain we hit a valid record and not some junk in the middle * of a file name. Stop when we reach or pass the expected offset. */ - dpoff = 0; + dpoff = (doff / DIRBLKSIZ) * DIRBLKSIZ; do { dp = (struct direct *)&block[dpoff]; if (dpoff == doff) @@ -801,7 +848,7 @@ ino_isat(ino_t parent, off_t diroff, ino dpoff += dp->d_reclen; } while (dpoff <= doff); if (dpoff > fs->fs_bsize) - errx(1, "Corrupt directory block in dir inode %d", parent); + errx(1, "Corrupt directory block in dir ino %d", parent); /* Not found. */ if (dpoff != doff) { if (debug) @@ -830,6 +877,7 @@ ino_isat(ino_t parent, off_t diroff, ino #define VISIT_INDIR 0x0001 #define VISIT_EXT 0x0002 +#define VISIT_ROOT 0x0004 /* Operation came via root & valid pointers. */ /* * Read an indirect level which may or may not be linked into an inode. @@ -854,16 +902,14 @@ indir_visit(ino_t ino, ufs_lbn_t lbn, uf */ if (blk == 0) return; - if (blk_isindir(blk, ino, lbn) == 0) { - if (debug) - printf("blk %jd ino %d lbn %jd is not indir.\n", - blk, ino, lbn); - goto out; - } level = lbn_level(lbn); - if (level == -1) { - abort(); + if (level == -1) errx(1, "Invalid level for lbn %jd", lbn); + if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) { + if (debug) + printf("blk %jd ino %d lbn %jd(%d) is not indir.\n", + blk, ino, lbn, level); + goto out; } lbnadd = 1; for (i = level; i > 0; i--) @@ -903,6 +949,7 @@ out: static uint64_t ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags) { + ufs_lbn_t nextlbn; ufs_lbn_t tmpval; ufs_lbn_t lbn; uint64_t size; @@ -937,8 +984,15 @@ ino_visit(union dinode *ip, ino_t ino, i fragcnt += frags; visitor(ino, i, DIP(ip, di_db[i]), frags); } + /* + * We know the following indirects are real as we're following + * real pointers to them. + */ + flags |= VISIT_ROOT; for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, - tmpval *= NINDIR(fs), lbn += tmpval) { + lbn = nextlbn) { + nextlbn = lbn + tmpval; + tmpval *= NINDIR(fs); if (DIP(ip, di_ib[i]) == 0) continue; indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor, @@ -948,11 +1002,15 @@ ino_visit(union dinode *ip, ino_t ino, i } /* - * Null visitor function used when we just want to count blocks. + * Null visitor function used when we just want to count blocks and + * record the lbn. */ +ufs_lbn_t visitlbn; static void null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { + if (lbn > 0) + visitlbn = lbn; } /* @@ -962,23 +1020,45 @@ null_visit(ino_t ino, ufs_lbn_t lbn, ufs * reachable at the time the inode was written. */ static void -ino_adjblks(ino_t ino) +ino_adjblks(struct suj_ino *sino) { - struct suj_ino *sino; union dinode *ip; uint64_t blocks; uint64_t frags; + off_t isize; + off_t size; + ino_t ino; - sino = ino_lookup(ino, 1); - if (sino->si_blkadj) - return; - sino->si_blkadj = 1; + ino = sino->si_ino; ip = ino_read(ino); /* No need to adjust zero'd inodes. */ if (DIP(ip, di_mode) == 0) return; + /* + * Visit all blocks and count them as well as recording the last + * valid lbn in the file. If the file size doesn't agree with the + * last lbn we need to truncate to fix it. Otherwise just adjust + * the blocks count. + */ + visitlbn = 0; frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); blocks = fsbtodb(fs, frags); + /* + * We assume the size and direct block list is kept coherent by + * softdep. For files that have extended into indirects we truncate + * to the size in the inode or the maximum size permitted by + * populated indirects. + */ + if (visitlbn >= NDADDR) { + isize = DIP(ip, di_size); + size = lblktosize(fs, visitlbn + 1); + printf("ino %d isize %jd size %jd\n", ino, isize, size); + if (isize > size) + isize = size; + /* Always truncate to free any unpopulated indirects. */ + ino_trunc(sino->si_ino, isize); + return; + } if (blocks == DIP(ip, di_blocks)) return; if (debug) @@ -1021,6 +1101,16 @@ blk_free_lbn(ufs2_daddr_t blk, ino_t ino } static void +ino_setskip(struct suj_ino *sino, ino_t parent) +{ + int isdot; + int mode; + + if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot)) + sino->si_skipparent = 1; +} + +static void ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { struct suj_ino *sino; @@ -1053,7 +1143,7 @@ ino_free_children(ino_t ino, ufs_lbn_t l if (isparent && skipparent == 1) continue; if (debug) - printf("Directory %d removing inode %d name %s\n", + printf("Directory %d removing ino %d name %s\n", ino, dp->d_ino, dp->d_name); /* * Lookup this inode to see if we have a record for it. @@ -1070,7 +1160,7 @@ ino_free_children(ino_t ino, ufs_lbn_t l * parent. Don't try to adjust our link down again. */ if (isparent == 0) - sino->si_skipparent = 1; + ino_setskip(sino, ino); /* * If we haven't yet processed this inode we need to make * sure we will successfully discover the lost path. If not @@ -1084,16 +1174,16 @@ ino_free_children(ino_t ino, ufs_lbn_t l break; } if (srec == NULL) - sino->si_nlinkadj--; + sino->si_nlinkadj++; } } /* - * Truncate an inode, freeing all blocks and decrementing all children's + * Reclaim an inode, freeing all blocks and decrementing all children's * link counts. Free the inode back to the cg. */ static void -ino_truncate(union dinode *ip, ino_t ino, int mode) +ino_reclaim(union dinode *ip, ino_t ino, int mode) { uint32_t gen; @@ -1147,7 +1237,7 @@ ino_decr(ino_t ino) if (debug) printf("ino %d not enough links to live %d < %d\n", ino, nlink, reqlink); - ino_truncate(ip, ino, mode); + ino_reclaim(ip, ino, mode); return; } DIP_SET(ip, di_nlink, nlink); @@ -1192,7 +1282,7 @@ ino_adjust(ino_t ino, int lastmode, nlin if (debug) printf("ino %d not enough links to live %d < %d\n", ino, nlink, reqlink); - ino_truncate(ip, ino, mode); + ino_reclaim(ip, ino, mode); return; } /* If required write the updated link count. */ @@ -1205,13 +1295,194 @@ ino_adjust(ino_t ino, int lastmode, nlin ino_dirty(ino); } -#define DOTDOT_OFFSET DIRECTSIZ(1) +/* + * Truncate some or all blocks in an indirect, freeing any that are required + * and zeroing the indirect. + */ +static void +indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn) +{ + ufs2_daddr_t *bap2; + ufs1_daddr_t *bap1; + ufs_lbn_t lbnadd; + ufs2_daddr_t nblk; + ufs_lbn_t next; + ufs_lbn_t nlbn; + int dirty; + int level; + int i; + + if (blk == 0) + return; + dirty = 0; + level = lbn_level(lbn); + if (level == -1) + errx(1, "Invalid level for lbn %jd", lbn); + lbnadd = 1; + for (i = level; i > 0; i--) + lbnadd *= NINDIR(fs); + bap1 = (void *)dblk_read(blk, fs->fs_bsize); + bap2 = (void *)bap1; + for (i = 0; i < NINDIR(fs); i++) { + if (fs->fs_magic == FS_UFS1_MAGIC) + nblk = *bap1++; + else + nblk = *bap2++; + if (nblk == 0) + continue; + if (level != 0) { + nlbn = (lbn + 1) - (i * lbnadd); + /* + * Calculate the lbn of the next indirect to + * determine if any of this indirect must be + * reclaimed. + */ + next = -(lbn + level) + ((i+1) * lbnadd); + if (next <= lastlbn) + continue; + indir_trunc(ino, nlbn, nblk, lastlbn); + /* If all of this indirect was reclaimed, free it. */ + nlbn = next - lbnadd; + if (nlbn < lastlbn) + continue; + } else { + nlbn = -lbn + i * lbnadd; + if (nlbn < lastlbn) + continue; + } + dirty = 1; + blk_free(nblk, 0, fs->fs_frag); + if (fs->fs_magic == FS_UFS1_MAGIC) + *(bap1 - 1) = 0; + else + *(bap2 - 1) = 0; + } + if (dirty) + dblk_dirty(blk); +} + +/* + * Truncate an inode to the minimum of the given size or the last populated + * block after any over size have been discarded. The kernel would allocate + * the last block in the file but fsck does not and neither do we. This + * code never extends files, only shrinks them. + */ +static void +ino_trunc(ino_t ino, off_t size) +{ + union dinode *ip; + ufs2_daddr_t bn; + uint64_t totalfrags; + ufs_lbn_t nextlbn; + ufs_lbn_t lastlbn; + ufs_lbn_t tmpval; + ufs_lbn_t lbn; + ufs_lbn_t i; + int frags; + off_t cursize; + off_t off; + int mode; + + ip = ino_read(ino); + mode = DIP(ip, di_mode) & IFMT; + cursize = DIP(ip, di_size); + if (debug) + printf("Truncating ino %d, mode %o to size %jd from size %jd\n", + ino, mode, size, cursize); + + /* Skip datablocks for short links and devices. */ + if (mode == 0 || mode == IFBLK || mode == IFCHR || + (mode == IFLNK && cursize < fs->fs_maxsymlinklen)) + return; + /* Don't extend. */ + if (size > cursize) + size = cursize; + lastlbn = lblkno(fs, blkroundup(fs, size)); + for (i = lastlbn; i < NDADDR; i++) { + if (DIP(ip, di_db[i]) == 0) + continue; + frags = sblksize(fs, cursize, i); + frags = numfrags(fs, frags); + blk_free(DIP(ip, di_db[i]), 0, frags); + DIP_SET(ip, di_db[i], 0); + } + /* + * Follow indirect blocks, freeing anything required. + */ + for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, + lbn = nextlbn) { + nextlbn = lbn + tmpval; + tmpval *= NINDIR(fs); + /* If we're not freeing any in this indirect range skip it. */ + if (lastlbn >= nextlbn) + continue; + if (DIP(ip, di_ib[i]) == 0) + continue; + indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn); + /* If we freed everything in this indirect free the indir. */ + if (lastlbn > lbn) + continue; + blk_free(DIP(ip, di_ib[i]), 0, frags); + DIP_SET(ip, di_ib[i], 0); + } + ino_dirty(ino); + /* + * Now that we've freed any whole blocks that exceed the desired + * truncation size, figure out how many blocks remain and what the + * last populated lbn is. We will set the size to this last lbn + * rather than worrying about allocating the final lbn as the kernel + * would've done. This is consistent with normal fsck behavior. + */ + visitlbn = 0; + totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); + if (size > lblktosize(fs, visitlbn + 1)) + size = lblktosize(fs, visitlbn + 1); + /* + * If we're truncating direct blocks we have to adjust frags + * accordingly. + */ + if (visitlbn < NDADDR) { + long oldspace, newspace; + + bn = DIP(ip, di_db[visitlbn]); + oldspace = sblksize(fs, cursize, visitlbn); + newspace = sblksize(fs, size, visitlbn); + if (oldspace != newspace) { + bn += numfrags(fs, newspace); + frags = numfrags(fs, oldspace - newspace); + blk_free(bn, 0, frags); + totalfrags -= frags; + } + } + DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags)); + DIP_SET(ip, di_size, size); + /* + * If we've truncated into the middle of a block or frag we have + * to zero it here. Otherwise the file could extend into + * uninitialized space later. + */ + off = blkoff(fs, size); + if (off) { + uint8_t *buf; + long clrsize; + + bn = ino_blkatoff(ip, ino, visitlbn, &frags); + if (bn == 0) + errx(1, "Block missing from ino %d at lbn %jd\n", + ino, visitlbn); + clrsize = frags * fs->fs_fsize; + buf = dblk_read(bn, clrsize); + clrsize -= off; + buf += off; + bzero(buf, clrsize); + dblk_dirty(bn); + } + return; +} /* * Process records available for one inode and determine whether the * link count is correct or needs adjusting. - * - * XXX Failed to fix zero length directory. Shouldn't .. have been mising? */ static void ino_check(struct suj_ino *sino) @@ -1228,6 +1499,15 @@ ino_check(struct suj_ino *sino) int isat; int mode; + /* + * Handle truncations that were not complete. We don't have + * to worry about truncating directory entries as they must have + * been removed for truncate to succeed. + */ + if (sino->si_trunc) { + ino_trunc(ino, sino->si_trunc->jt_size); + sino->si_trunc = NULL; + } if (sino->si_hasrecs == 0) return; ino = sino->si_ino; @@ -1239,9 +1519,9 @@ ino_check(struct suj_ino *sino) return; rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; nlink = rrec->jr_nlink; - newlinks = sino->si_nlinkadj; + newlinks = 0; dotlinks = 0; - removes = 0; + removes = sino->si_nlinkadj; TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, @@ -1286,7 +1566,7 @@ ino_check(struct suj_ino *sino) if (rrec->jr_diroff == DOTDOT_OFFSET) { stmp = ino_lookup(rrec->jr_parent, 0); if (stmp) - stmp->si_skipparent = 1; + ino_setskip(stmp, ino); } } } @@ -1304,6 +1584,7 @@ blk_check(struct suj_blk *sblk) { struct suj_rec *srec; struct jblkrec *brec; + struct suj_ino *sino; ufs2_daddr_t blk; int mask; int frags; @@ -1318,6 +1599,10 @@ blk_check(struct suj_blk *sblk) frags = brec->jb_frags; blk = brec->jb_blkno + brec->jb_oldfrags; isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); + if (sino == NULL || sino->si_ino != brec->jb_ino) { + sino = ino_lookup(brec->jb_ino, 1); + sino->si_blkadj = 1; + } if (debug) printf("op %d blk %jd ino %d lbn %jd frags %d isat %d (%d)\n", brec->jb_op, blk, brec->jb_ino, brec->jb_lbn, @@ -1336,7 +1621,6 @@ blk_check(struct suj_blk *sblk) blk += frags; frags = brec->jb_frags - frags; blk_free(blk, mask, frags); - ino_adjblks(brec->jb_ino); continue; } /* @@ -1349,19 +1633,31 @@ blk_check(struct suj_blk *sblk) */ blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, brec->jb_op == JOP_FREEBLK); - ino_adjblks(brec->jb_ino); } } /* + * Walk the list of inode records for this cg and resolve moved and duplicate + * inode references now that we have a complete picture. + */ +static void +cg_build(struct suj_cg *sc) +{ + struct suj_ino *sino; + int i; + + for (i = 0; i < SUJ_HASHSIZE; i++) + LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) + ino_build(sino); +} + +/* * Walk the list of inode and block records for this cg, recovering any * changes which were not complete at the time of crash. */ static void cg_check(struct suj_cg *sc) { - struct suj_blk *nextb; - struct suj_ino *nexti; struct suj_ino *sino; struct suj_blk *sblk; int i; @@ -1370,32 +1666,43 @@ cg_check(struct suj_cg *sc) printf("Recovering cg %d\n", sc->sc_cgx); for (i = 0; i < SUJ_HASHSIZE; i++) - LIST_FOREACH_SAFE(sino, &sc->sc_inohash[i], si_next, nexti) + LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) ino_check(sino); for (i = 0; i < SUJ_HASHSIZE; i++) - LIST_FOREACH_SAFE(sblk, &sc->sc_blkhash[i], sb_next, nextb) + LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) blk_check(sblk); } /* - * Write a potentially dirty cg. All inodes must be written before the - * cg maps are so that an allocated inode is never marked free, even if - * we crash during fsck. + * Now that we've freed blocks which are not referenced we make a second + * pass over all inodes to adjust their block counts. + */ +static void +cg_check2(struct suj_cg *sc) +{ + struct suj_ino *sino; + int i; + + for (i = 0; i < SUJ_HASHSIZE; i++) + LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) + if (sino->si_blkadj) + ino_adjblks(sino); +} + +/* + * Write a potentially dirty cg. Recalculate the summary information and + * update the superblock summary. */ static void cg_write(struct suj_cg *sc) { - struct ino_blk *iblk; ufs1_daddr_t fragno, cgbno, maxbno; u_int8_t *blksfree; struct cg *cgp; int blk; int i; - for (i = 0; i < SUJ_HASHSIZE; i++) - LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next) - iblk_write(iblk); if (sc->sc_dirty == 0) return; /* @@ -1437,6 +1744,21 @@ cg_write(struct suj_cg *sc) err(1, "Unable to write cylinder group %d", sc->sc_cgx); } +/* + * Write out any modified inodes. + */ +static void +cg_write_inos(struct suj_cg *sc) +{ + struct ino_blk *iblk; + int i; + + for (i = 0; i < SUJ_HASHSIZE; i++) + LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next) + if (iblk->ib_dirty) + iblk_write(iblk); +} + static void cg_apply(void (*apply)(struct suj_cg *)) { @@ -1473,7 +1795,7 @@ ino_unlinked(void) if (debug) printf("Freeing unlinked ino %d mode %o\n", ino, mode); - ino_truncate(ip, ino, mode); + ino_reclaim(ip, ino, mode); } else if (debug) printf("Skipping ino %d mode %o with link %d\n", ino, mode, DIP(ip, di_nlink)); @@ -1482,6 +1804,29 @@ ino_unlinked(void) } /* + * Append a new record to the list of records requiring processing. + */ +static void +ino_append(union jrec *rec) +{ + struct suj_ino *sino; + struct suj_rec *srec; + + /* + * Lookup the ino and clear truncate if one is found. Partial + * truncates are always done synchronously so if we discover + * an operation that requires a lock the truncation has completed + * and can be discarded. + */ + sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); + sino->si_trunc = NULL; + sino->si_hasrecs = 1; + srec = errmalloc(sizeof(*srec)); + srec->sr_rec = rec; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201001252330.o0PNUrlq010722>
