Date: Tue, 26 Jan 2010 06:45:39 +0000 (UTC) From: Jeff Roberson <jeff@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r203013 - in projects/suj: 6/lib/libufs 6/sbin/fsck_ffs 6/sbin/tunefs 6/sys/sys 6/sys/ufs/ffs 7/lib/libufs 7/sbin/fsck_ffs 7/sbin/mount 7/sbin/tunefs 7/sys/sys 7/sys/ufs/ffs 7/sys/ufs/u... Message-ID: <201001260645.o0Q6jdqC007768@svn.freebsd.org>
index | next in thread | raw e-mail
Author: jeff Date: Tue Jan 26 06:45:38 2010 New Revision: 203013 URL: http://svn.freebsd.org/changeset/base/203013 Log: - Merge r203012 from suj/head Modified: projects/suj/6/lib/libufs/cgroup.c projects/suj/6/lib/libufs/libufs.h projects/suj/6/sbin/fsck_ffs/pass4.c projects/suj/6/sbin/fsck_ffs/suj.c projects/suj/6/sbin/tunefs/tunefs.c projects/suj/6/sys/sys/mount.h projects/suj/6/sys/ufs/ffs/ffs_alloc.c projects/suj/6/sys/ufs/ffs/ffs_softdep.c projects/suj/6/sys/ufs/ffs/ffs_vfsops.c projects/suj/6/sys/ufs/ffs/fs.h projects/suj/7/lib/libufs/cgroup.c projects/suj/7/lib/libufs/libufs.h projects/suj/7/sbin/fsck_ffs/pass4.c projects/suj/7/sbin/fsck_ffs/suj.c projects/suj/7/sbin/mount/mount.c projects/suj/7/sbin/tunefs/tunefs.c projects/suj/7/sys/sys/mount.h projects/suj/7/sys/ufs/ffs/ffs_alloc.c projects/suj/7/sys/ufs/ffs/ffs_softdep.c projects/suj/7/sys/ufs/ffs/ffs_vfsops.c projects/suj/7/sys/ufs/ffs/fs.h projects/suj/7/sys/ufs/ufs/inode.h projects/suj/8/lib/libufs/cgroup.c projects/suj/8/lib/libufs/libufs.h projects/suj/8/sbin/fsck_ffs/pass4.c projects/suj/8/sbin/fsck_ffs/suj.c projects/suj/8/sbin/mount/mount.c projects/suj/8/sbin/tunefs/tunefs.c projects/suj/8/sys/sys/mount.h projects/suj/8/sys/ufs/ffs/ffs_alloc.c projects/suj/8/sys/ufs/ffs/ffs_softdep.c projects/suj/8/sys/ufs/ffs/ffs_vfsops.c projects/suj/8/sys/ufs/ffs/fs.h projects/suj/8/sys/ufs/ufs/inode.h Modified: projects/suj/6/lib/libufs/cgroup.c ============================================================================== --- projects/suj/6/lib/libufs/cgroup.c Tue Jan 26 06:36:10 2010 (r203012) +++ projects/suj/6/lib/libufs/cgroup.c Tue Jan 26 06:45:38 2010 (r203013) @@ -71,6 +71,67 @@ gotit: return (cgbase(fs, cgp->cg_cgx) + blkstofrags(fs, bno)); } +int +cgbfree(struct uufsd *disk, ufs2_daddr_t bno, long size) +{ + u_int8_t *blksfree; + struct fs *fs; + struct cg *cgp; + ufs1_daddr_t fragno, cgbno; + int i, cg, blk, frags, bbase; + + fs = &disk->d_fs; + cg = dtog(fs, bno); + if (cgread1(disk, cg) != 1) + return (-1); + cgp = &disk->d_cg; + cgbno = dtogd(fs, bno); + blksfree = cg_blksfree(cgp); + if (size == fs->fs_bsize) { + fragno = fragstoblks(fs, cgbno); + ffs_setblock(fs, blksfree, fragno); + ffs_clusteracct(fs, cgp, fragno, 1); + cgp->cg_cs.cs_nbfree++; + fs->fs_cstotal.cs_nbfree++; + fs->fs_cs(fs, cg).cs_nbfree++; + } else { + bbase = cgbno - fragnum(fs, cgbno); + /* + * decrement the counts associated with the old frags + */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* + * deallocate the fragment + */ + frags = numfrags(fs, size); + for (i = 0; i < frags; i++) + setbit(blksfree, cgbno + i); + cgp->cg_cs.cs_nffree += i; + fs->fs_cstotal.cs_nffree += i; + fs->fs_cs(fs, cg).cs_nffree += i; + /* + * add back in counts associated with the new frags + */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + /* + * if a complete block has been reassembled, account for it + */ + fragno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, blksfree, fragno)) { + cgp->cg_cs.cs_nffree -= fs->fs_frag; + fs->fs_cstotal.cs_nffree -= fs->fs_frag; + fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; + ffs_clusteracct(fs, cgp, fragno, 1); + cgp->cg_cs.cs_nbfree++; + fs->fs_cstotal.cs_nbfree++; + fs->fs_cs(fs, cg).cs_nbfree++; + } + } + return cgwrite(disk); +} + ino_t cgialloc(struct uufsd *disk) { Modified: projects/suj/6/lib/libufs/libufs.h ============================================================================== --- projects/suj/6/lib/libufs/libufs.h Tue Jan 26 06:36:10 2010 (r203012) +++ projects/suj/6/lib/libufs/libufs.h Tue Jan 26 06:45:38 2010 (r203013) @@ -110,6 +110,7 @@ ssize_t bwrite(struct uufsd *, ufs2_dadd * cgroup.c */ ufs2_daddr_t cgballoc(struct uufsd *); +int cgbfree(struct uufsd *, ufs2_daddr_t, long); ino_t cgialloc(struct uufsd *); int cgread(struct uufsd *); int cgread1(struct uufsd *, int); Modified: projects/suj/6/sbin/fsck_ffs/pass4.c ============================================================================== --- projects/suj/6/sbin/fsck_ffs/pass4.c Tue Jan 26 06:36:10 2010 (r203012) +++ projects/suj/6/sbin/fsck_ffs/pass4.c Tue Jan 26 06:45:38 2010 (r203013) @@ -72,9 +72,6 @@ pass4(void) for (i = 0; i < inostathead[cg].il_numalloced; i++, inumber++) { if (inumber < ROOTINO) continue; - if (sblock.fs_flags & FS_SUJ && - inumber == sblock.fs_sujournal) - continue; idesc.id_number = inumber; switch (inoinfo(inumber)->ino_state) { Modified: projects/suj/6/sbin/fsck_ffs/suj.c ============================================================================== --- projects/suj/6/sbin/fsck_ffs/suj.c Tue Jan 26 06:36:10 2010 (r203012) +++ projects/suj/6/sbin/fsck_ffs/suj.c Tue Jan 26 06:45:38 2010 (r203013) @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <stdlib.h> #include <stdint.h> #include <libufs.h> +#include <string.h> #include <strings.h> #include <err.h> #include <assert.h> @@ -63,6 +64,7 @@ struct suj_seg { struct suj_rec { TAILQ_ENTRY(suj_rec) sr_next; union jrec *sr_rec; + int sr_alt; /* Is alternate address? */ }; TAILQ_HEAD(srechd, suj_rec); @@ -127,6 +129,7 @@ TAILQ_HEAD(seghd, suj_seg) allsegs; uint64_t oldseq; static struct uufsd *disk = NULL; static struct fs *fs = NULL; +ino_t sujino; /* * Summary statistics. @@ -191,8 +194,7 @@ closedisk(const char *devnam) fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; } - /* XXX Don't set clean for now, we don't trust the journal. */ - /* fs->fs_clean = 1; */ + fs->fs_clean = 1; fs->fs_time = time(NULL); fs->fs_mtime = time(NULL); if (sbwrite(disk, 0) == -1) @@ -1823,6 +1825,7 @@ ino_append(union jrec *rec) sino->si_hasrecs = 1; srec = errmalloc(sizeof(*srec)); srec->sr_rec = rec; + srec->sr_alt = 0; TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next); } @@ -1844,9 +1847,10 @@ ino_build_ref(struct suj_ino *sino, stru refrec = (struct jrefrec *)srec->sr_rec; if (debug) - printf("ino_build: op %d, ino %d, nlink %d, parent %d, diroff %jd\n", - refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, refrec->jr_parent, - refrec->jr_diroff); + printf("ino_build: op %d, ino %d, nlink %d, " + "parent %d, diroff %jd\n", + refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, + refrec->jr_parent, refrec->jr_diroff); /* * Search for a mvrec that matches this offset. Whether it's an add @@ -1871,16 +1875,19 @@ ino_build_ref(struct suj_ino *sino, stru rrn = errmalloc(sizeof(*refrec)); *rrn = *refrec; rrn->jr_op = JOP_ADDREF; + rrn->jr_diroff = mvrec->jm_oldoff; srn = errmalloc(sizeof(*srec)); + srn->sr_alt = 1; srn->sr_rec = (union jrec *)rrn; ino_build_ref(sino, srn); - refrec->jr_diroff = mvrec->jm_oldoff; } } } /* * We walk backwards so that adds and removes are evaluated in the - * correct order. + * correct order. If a primary record conflicts with an alt keep + * the primary and discard the alt. We must track this to keep + * the correct number of removes in the list. */ for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; srn = TAILQ_PREV(srn, srechd, sr_next)) { @@ -1890,7 +1897,17 @@ ino_build_ref(struct suj_ino *sino, stru continue; if (debug) printf("Discarding dup.\n"); - rrn->jr_mode = refrec->jr_mode; + if (srn->sr_alt == 0) { + rrn->jr_mode = refrec->jr_mode; + return; + } + /* + * Replace the record in place with the old nlink in case + * we replace the head of the list. Abandon srec as a dup. + */ + refrec->jr_nlink = rrn->jr_nlink; + srn->sr_rec = srec->sr_rec; + srn->sr_alt = srec->sr_alt; return; } TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); @@ -1930,9 +1947,12 @@ ino_move_ref(struct suj_ino *sino, struc /* * When an entry is moved we don't know whether the write * to move has completed yet. To resolve this we create - * a new add dependency in the new location as if it were added - * twice. Only one will succeed. + * a new add dependency in the new location as if it were + * added twice. Only one will succeed. Consider the + * new offset the primary location for the inode and the + * old offset the alt. */ + srn->sr_alt = 1; refrec = errmalloc(sizeof(*refrec)); refrec->jr_op = JOP_ADDREF; refrec->jr_ino = mvrec->jm_ino; @@ -1941,12 +1961,14 @@ ino_move_ref(struct suj_ino *sino, struc refrec->jr_mode = rrn->jr_mode; refrec->jr_nlink = rrn->jr_nlink; srn = errmalloc(sizeof(*srn)); + srn->sr_alt = 0; srn->sr_rec = (union jrec *)refrec; ino_build_ref(sino, srn); break; } /* - * Add this mvrec to the queue of pending mvs. + * Add this mvrec to the queue of pending mvs, possibly collapsing + * it with a prior move for the same inode and offset. */ for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = TAILQ_PREV(srn, srechd, sr_next)) { @@ -2195,19 +2217,25 @@ suj_verifyino(union dinode *ip) if (DIP(ip, di_nlink) != 1) { printf("Invalid link count %d for journal inode %d\n", - DIP(ip, di_nlink), fs->fs_sujournal); + DIP(ip, di_nlink), sujino); + return (-1); + } + + if (DIP(ip, di_flags) != (SF_IMMUTABLE | SF_NOUNLINK)) { + printf("Invalid flags 0x%X for journal inode %d\n", + DIP(ip, di_flags), sujino); return (-1); } - if (DIP(ip, di_mode) != IFREG) { - printf("Invalid mode %d for journal inode %d\n", - DIP(ip, di_mode), fs->fs_sujournal); + if (DIP(ip, di_mode) != (IFREG | IREAD)) { + printf("Invalid mode %o for journal inode %d\n", + DIP(ip, di_mode), sujino); return (-1); } if (DIP(ip, di_size) < SUJ_MIN || DIP(ip, di_size) > SUJ_MAX) { printf("Invalid size %jd for journal inode %d\n", - DIP(ip, di_size), fs->fs_sujournal); + DIP(ip, di_size), sujino); return (-1); } @@ -2447,20 +2475,60 @@ restart: } /* + * Search a directory block for the SUJ_FILE. + */ +static void +suj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) +{ + char block[MAXBSIZE]; + struct direct *dp; + int bytes; + int off; + + if (sujino) + return; + bytes = lfragtosize(fs, frags); + if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0) + err(1, "Failed to read ROOTINO directory block %jd", blk); + for (off = 0; off < bytes; off += dp->d_reclen) { + dp = (struct direct *)&block[off]; + if (dp->d_reclen == 0) + break; + if (dp->d_ino == 0) + continue; + if (dp->d_namlen != strlen(SUJ_FILE)) + continue; + if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0) + continue; + sujino = dp->d_ino; + return; + } +} + +/* * Orchestrate the verification of a filesystem via the softupdates journal. */ int suj_check(const char *filesys) { union dinode *jip; + union dinode *ip; uint64_t blocks; opendisk(filesys); TAILQ_INIT(&allsegs); /* + * Find the journal inode. + */ + ip = ino_read(ROOTINO); + sujino = 0; + ino_visit(ip, ROOTINO, suj_find, 0); + if (sujino == 0) + errx(1, "Journal inode removed. Use tunefs to re-create."); + /* * Fetch the journal inode and verify it. */ - jip = ino_read(fs->fs_sujournal); + jip = ino_read(sujino); printf("** SU+J Recovering %s\n", filesys); if (suj_verifyino(jip) != 0) return (-1); @@ -2469,11 +2537,11 @@ suj_check(const char *filesys) * available journal blocks in with suj_read(). */ printf("** Reading %jd byte journal from inode %d.\n", - DIP(jip, di_size), fs->fs_sujournal); + DIP(jip, di_size), sujino); suj_jblocks = jblocks_create(); - blocks = ino_visit(jip, fs->fs_sujournal, suj_add_block, 0); + blocks = ino_visit(jip, sujino, suj_add_block, 0); if (blocks != numfrags(fs, DIP(jip, di_size))) - errx(1, "Sparse journal inode %d.\n", fs->fs_sujournal); + errx(1, "Sparse journal inode %d.\n", sujino); suj_read(); jblocks_destroy(suj_jblocks); suj_jblocks = NULL; Modified: projects/suj/6/sbin/tunefs/tunefs.c ============================================================================== --- projects/suj/6/sbin/tunefs/tunefs.c Tue Jan 26 06:36:10 2010 (r203012) +++ projects/suj/6/sbin/tunefs/tunefs.c Tue Jan 26 06:45:38 2010 (r203013) @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <ufs/ufs/ufsmount.h> #include <ufs/ufs/dinode.h> #include <ufs/ffs/fs.h> +#include <ufs/ufs/dir.h> #include <ctype.h> #include <err.h> @@ -74,6 +75,7 @@ struct uufsd disk; void usage(void); void printfs(void); int journal_alloc(int64_t size); +void journal_clear(void); void sbdirty(void); int @@ -327,11 +329,11 @@ main(int argc, char *argv[]) if ((~sblock.fs_flags & FS_SUJ) == FS_SUJ) { warnx("%s remains unchanged as disabled", name); } else { - sbdirty(); + journal_clear(); sblock.fs_flags &= ~(FS_DOSOFTDEP | FS_SUJ); - sblock.fs_sujournal = 0; sblock.fs_sujfree = 0; - warnx("%s cleared", name); + warnx("%s cleared, " + "remove .sujournal to reclaim space", name); } } } @@ -452,11 +454,9 @@ journal_balloc(void) { ufs2_daddr_t blk; struct cg *cgp; - struct fs *fs; int valid; cgp = &disk.d_cg; - fs = &disk.d_fs; for (;;) { blk = cgballoc(&disk); if (blk > 0) @@ -482,13 +482,231 @@ journal_balloc(void) warnx("Failed to find sufficient free blocks for the journal"); return -1; } - if (bwrite(&disk, fsbtodb(fs, blk), clrbuf, fs->fs_bsize) <= 0) { + if (bwrite(&disk, fsbtodb(&sblock, blk), clrbuf, + sblock.fs_bsize) <= 0) { warn("Failed to initialize new block"); return -1; } return (blk); } +/* + * Search a directory block for the SUJ_FILE. + */ +static ino_t +dir_search(ufs2_daddr_t blk, int bytes) +{ + char block[MAXBSIZE]; + struct direct *dp; + int off; + + if (bread(&disk, fsbtodb(&sblock, blk), block, bytes) <= 0) { + warn("Failed to read dir block"); + return (-1); + } + for (off = 0; off < bytes; off += dp->d_reclen) { + dp = (struct direct *)&block[off]; + if (dp->d_reclen == 0) + break; + if (dp->d_ino == 0) + continue; + if (dp->d_namlen != strlen(SUJ_FILE)) + continue; + if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0) + continue; + return (dp->d_ino); + } + + return (0); +} + +/* + * Search in the ROOTINO for the SUJ_FILE. If it exists we can not enable + * journaling. + */ +static ino_t +journal_findfile(void) +{ + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; + int mode; + void *ip; + int i; + + if (getino(&disk, &ip, ROOTINO, &mode) != 0) { + warn("Failed to get root inode"); + return (-1); + } + dp2 = ip; + dp1 = ip; + if (sblock.fs_magic == FS_UFS1_MAGIC) { + if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) { + warnx("ROOTINO extends beyond direct blocks."); + return (-1); + } + for (i = 0; i < NDADDR; i++) { + if (dp1->di_db[i] == 0) + break; + if (dir_search(dp1->di_db[i], + sblksize(&sblock, (off_t)dp1->di_size, i)) != 0) + return (-1); + } + } else { + if ((off_t)dp1->di_size >= lblktosize(&sblock, NDADDR)) { + warnx("ROOTINO extends beyond direct blocks."); + return (-1); + } + for (i = 0; i < NDADDR; i++) { + if (dp2->di_db[i] == 0) + break; + if (dir_search(dp2->di_db[i], + sblksize(&sblock, (off_t)dp2->di_size, i)) != 0) + return (-1); + } + } + + return (0); +} + +/* + * Insert the journal at inode 'ino' into directory blk 'blk' at the first + * free offset of 'off'. DIRBLKSIZ blocks after off are initialized as + * empty. + */ +static int +dir_insert(ufs2_daddr_t blk, off_t off, ino_t ino) +{ + struct direct *dp; + char block[MAXBSIZE]; + + if (bread(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) { + warn("Failed to read dir block"); + return (-1); + } + bzero(&block[off], sblock.fs_bsize - off); + dp = (struct direct *)&block[off]; + dp->d_ino = ino; + dp->d_reclen = DIRBLKSIZ; + dp->d_type = DT_REG; + dp->d_namlen = strlen(SUJ_FILE); + bcopy(SUJ_FILE, &dp->d_name, strlen(SUJ_FILE)); + off += DIRBLKSIZ; + for (; off < sblock.fs_bsize; off += DIRBLKSIZ) { + dp = (struct direct *)&block[off]; + dp->d_ino = 0; + dp->d_reclen = DIRBLKSIZ; + dp->d_type = DT_UNKNOWN; + } + if (bwrite(&disk, fsbtodb(&sblock, blk), block, sblock.fs_bsize) <= 0) { + warn("Failed to write dir block"); + return (-1); + } + return (0); +} + +/* + * Extend a directory block in 'blk' by copying it to a full size block + * and inserting the new journal inode into .sujournal. + */ +static int +dir_extend(ufs2_daddr_t blk, ufs2_daddr_t nblk, off_t size, ino_t ino) +{ + char block[MAXBSIZE]; + + if (bread(&disk, fsbtodb(&sblock, blk), block, size) <= 0) { + warn("Failed to read dir block"); + return (-1); + } + if (bwrite(&disk, fsbtodb(&sblock, nblk), block, size) <= 0) { + warn("Failed to write dir block"); + return (-1); + } + + return dir_insert(nblk, size, ino); +} + +/* + * Insert the journal file into the ROOTINO directory. We always extend the + * last frag + */ +static int +journal_insertfile(ino_t ino) +{ + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; + void *ip; + ufs2_daddr_t nblk; + ufs2_daddr_t blk; + ufs_lbn_t lbn; + int size; + int mode; + int off; + + if (getino(&disk, &ip, ROOTINO, &mode) != 0) { + warn("Failed to get root inode"); + sbdirty(); + return (-1); + } + dp2 = ip; + dp1 = ip; + blk = 0; + size = 0; + nblk = journal_balloc(); + if (nblk <= 0) + return (-1); + /* + * For simplicity sake we aways extend the ROOTINO into a new + * directory block rather than searching for space and inserting + * into an existing block. However, if the rootino has frags + * have to free them and extend the block. + */ + if (sblock.fs_magic == FS_UFS1_MAGIC) { + lbn = lblkno(&sblock, dp1->di_size); + off = blkoff(&sblock, dp1->di_size); + blk = dp1->di_db[lbn]; + size = sblksize(&sblock, (off_t)dp1->di_size, lbn); + } else { + lbn = lblkno(&sblock, dp2->di_size); + off = blkoff(&sblock, dp2->di_size); + blk = dp2->di_db[lbn]; + size = sblksize(&sblock, (off_t)dp2->di_size, lbn); + } + if (off != 0) { + if (dir_extend(blk, nblk, off, ino) == -1) + return (-1); + } else { + blk = 0; + if (dir_insert(nblk, 0, ino) == -1) + return (-1); + } + if (sblock.fs_magic == FS_UFS1_MAGIC) { + dp1->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE; + dp1->di_db[lbn] = nblk; + dp1->di_size = lblktosize(&sblock, lbn+1); + } else { + dp2->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE; + dp2->di_db[lbn] = nblk; + dp2->di_size = lblktosize(&sblock, lbn+1); + } + if (putino(&disk) < 0) { + warn("Failed to write root inode"); + return (-1); + } + if (cgwrite(&disk) < 0) { + warn("Failed to write updated cg"); + sbdirty(); + return (-1); + } + if (blk) { + if (cgbfree(&disk, blk, size) < 0) { + warn("Failed to write cg"); + return (-1); + } + } + + return (0); +} + static int indir_fill(ufs2_daddr_t blk, int level, int *resid) { @@ -496,22 +714,20 @@ indir_fill(ufs2_daddr_t blk, int level, ufs1_daddr_t *bap1; ufs2_daddr_t *bap2; ufs2_daddr_t nblk; - struct fs *fs; int ncnt; int cnt; int i; - fs = &disk.d_fs; bzero(indirbuf, sizeof(indirbuf)); bap1 = (ufs1_daddr_t *)indirbuf; bap2 = (void *)bap1; cnt = 0; - for (i = 0; i < NINDIR(fs) && *resid != 0; i++) { + for (i = 0; i < NINDIR(&sblock) && *resid != 0; i++) { nblk = journal_balloc(); if (nblk <= 0) return (-1); cnt++; - if (fs->fs_magic == FS_UFS1_MAGIC) + if (sblock.fs_magic == FS_UFS1_MAGIC) *bap1++ = nblk; else *bap2++ = nblk; @@ -523,13 +739,47 @@ indir_fill(ufs2_daddr_t blk, int level, } else (*resid)--; } - if (bwrite(&disk, fsbtodb(fs, blk), indirbuf, fs->fs_bsize) <= 0) { + if (bwrite(&disk, fsbtodb(&sblock, blk), indirbuf, + sblock.fs_bsize) <= 0) { warn("Failed to write indirect"); return (-1); } return (cnt); } +/* + * Clear the flag bits so the journal can be removed. + */ +void +journal_clear(void) +{ + struct ufs1_dinode *dp1; + struct ufs2_dinode *dp2; + ino_t ino; + int mode; + void *ip; + + ino = journal_findfile(); + if (ino <= 0) { + warnx("Journal file does not exist"); + return; + } + if (getino(&disk, &ip, ino, &mode) != 0) { + warn("Failed to get journal inode"); + return; + } + dp2 = ip; + dp1 = ip; + if (sblock.fs_magic == FS_UFS1_MAGIC) + dp1->di_flags = 0; + else + dp2->di_flags = 0; + if (putino(&disk) < 0) { + warn("Failed to write journal inode"); + return; + } +} + int journal_alloc(int64_t size) { @@ -538,32 +788,39 @@ journal_alloc(int64_t size) ufs2_daddr_t blk; void *ip; struct cg *cgp; - struct fs *fs; int resid; ino_t ino; int blks; int mode; int i; - fs = &disk.d_fs; cgp = &disk.d_cg; ino = 0; /* + * If the journal file exists we can't allocate it. + */ + ino = journal_findfile(); + if (ino > 0) + warnx("Journal file %s already exists, please remove.", + SUJ_FILE); + if (ino != 0) + return (-1); + /* * If the user didn't supply a size pick one based on the filesystem * size constrained with hardcoded MIN and MAX values. We opt for * 1/1024th of the filesystem up to MAX but not exceeding one CG and * not less than the MIN. */ if (size == 0) { - size = (fs->fs_size * fs->fs_bsize) / 1024; + size = (sblock.fs_size * sblock.fs_bsize) / 1024; size = MIN(SUJ_MAX, size); - if (size / fs->fs_fsize > fs->fs_fpg) - size = fs->fs_fpg * fs->fs_fsize; + if (size / sblock.fs_fsize > sblock.fs_fpg) + size = sblock.fs_fpg * sblock.fs_fsize; size = MAX(SUJ_MIN, size); } - resid = blocks = size / fs->fs_bsize; - if (fs->fs_cstotal.cs_nbfree < blocks) { + resid = blocks = size / sblock.fs_bsize; + if (sblock.fs_cstotal.cs_nbfree < blocks) { warn("Insufficient free space for %jd byte journal", size); return (-1); } @@ -576,9 +833,9 @@ journal_alloc(int64_t size) continue; /* * Try to minimize fragmentation by requiring at least a - * 1/8th of the blocks be present in each cg we use. + * 1/16th of the blocks be present in each cg we use. */ - if (cgp->cg_cs.cs_nbfree < blocks / 8) + if (cgp->cg_cs.cs_nbfree < blocks / 16) continue; ino = cgialloc(&disk); if (ino <= 0) @@ -597,22 +854,24 @@ journal_alloc(int64_t size) */ dp2 = ip; dp1 = ip; - if (fs->fs_magic == FS_UFS1_MAGIC) { + if (sblock.fs_magic == FS_UFS1_MAGIC) { bzero(dp1, sizeof(*dp1)); dp1->di_size = size; - dp1->di_mode = IFREG; + dp1->di_mode = IFREG | IREAD; dp1->di_nlink = 1; + dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK; } else { bzero(dp2, sizeof(*dp2)); dp2->di_size = size; - dp2->di_mode = IFREG; + dp2->di_mode = IFREG | IREAD; dp2->di_nlink = 1; + dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK; } for (i = 0; i < NDADDR && resid; i++, resid--) { blk = journal_balloc(); if (blk <= 0) goto out; - if (fs->fs_magic == FS_UFS1_MAGIC) { + if (sblock.fs_magic == FS_UFS1_MAGIC) { dp1->di_db[i] = blk; dp1->di_blocks++; } else { @@ -629,7 +888,7 @@ journal_alloc(int64_t size) sbdirty(); goto out; } - if (fs->fs_magic == FS_UFS1_MAGIC) { + if (sblock.fs_magic == FS_UFS1_MAGIC) { dp1->di_ib[i] = blk; dp1->di_blocks += blks; } else { @@ -637,10 +896,10 @@ journal_alloc(int64_t size) dp2->di_blocks += blks; } } - if (fs->fs_magic == FS_UFS1_MAGIC) - dp1->di_blocks *= fs->fs_bsize / disk.d_bsize; + if (sblock.fs_magic == FS_UFS1_MAGIC) + dp1->di_blocks *= sblock.fs_bsize / disk.d_bsize; else - dp2->di_blocks *= fs->fs_bsize / disk.d_bsize; + dp2->di_blocks *= sblock.fs_bsize / disk.d_bsize; if (putino(&disk) < 0) { warn("Failed to write inode"); sbdirty(); @@ -651,8 +910,11 @@ journal_alloc(int64_t size) sbdirty(); return (-1); } - fs->fs_sujournal = ino; - fs->fs_sujfree = 0; + if (journal_insertfile(ino) < 0) { + sbdirty(); + return (-1); + } + sblock.fs_sujfree = 0; return (0); } warnx("Insufficient contiguous free space for the journal."); Modified: projects/suj/6/sys/sys/mount.h ============================================================================== --- projects/suj/6/sys/sys/mount.h Tue Jan 26 06:36:10 2010 (r203012) +++ projects/suj/6/sys/sys/mount.h Tue Jan 26 06:45:38 2010 (r203013) @@ -231,7 +231,6 @@ void __mnt_vnode_markerfree(str #define MNT_NOATIME 0x10000000 /* disable update of file access time */ #define MNT_NOCLUSTERR 0x40000000 /* disable cluster read */ #define MNT_NOCLUSTERW 0x80000000 /* disable cluster write */ -#define MNT_SUJ 0x00000080 /* softdep journaling */ /* * NFS export related mount flags. @@ -267,7 +266,7 @@ void __mnt_vnode_markerfree(str MNT_ROOTFS | MNT_NOATIME | MNT_NOCLUSTERR| \ MNT_NOCLUSTERW | MNT_SUIDDIR | MNT_SOFTDEP | \ MNT_IGNORE | MNT_EXPUBLIC | MNT_NOSYMFOLLOW | \ - MNT_JAILDEVFS | MNT_MULTILABEL | MNT_ACLS | MNT_SUJ) + MNT_JAILDEVFS | MNT_MULTILABEL | MNT_ACLS) /* Mask of flags that can be updated. */ #define MNT_UPDATEMASK (MNT_NOSUID | MNT_NOEXEC | \ @@ -303,6 +302,7 @@ void __mnt_vnode_markerfree(str * with the unmount attempt (used by NFS). */ #define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */ +#define MNTK_SUJ 0x00000100 /* Softdep journaling enabled */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ #define MNTK_SUSPEND 0x08000000 /* request write suspension */ Modified: projects/suj/6/sys/ufs/ffs/ffs_alloc.c ============================================================================== --- projects/suj/6/sys/ufs/ffs/ffs_alloc.c Tue Jan 26 06:36:10 2010 (r203012) +++ projects/suj/6/sys/ufs/ffs/ffs_alloc.c Tue Jan 26 06:45:38 2010 (r203013) @@ -1837,6 +1837,7 @@ ffs_blkfree(ump, fs, devvp, bno, size, i ino_t inum; struct workhead *dephd; { + struct mount *mp; struct cg *cgp; struct buf *bp; ufs1_daddr_t fragno, cgbno; @@ -1951,7 +1952,8 @@ ffs_blkfree(ump, fs, devvp, bno, size, i fs->fs_fmod = 1; ACTIVECLEAR(fs, cg); UFS_UNLOCK(ump); - if (UFSTOVFS(ump)->mnt_flag & MNT_SOFTDEP) + mp = UFSTOVFS(ump); + if (mp->mnt_flag & MNT_SOFTDEP) softdep_setup_blkfree(UFSTOVFS(ump), bp, bno, numfrags(fs, size), dephd); bdwrite(bp); Modified: projects/suj/6/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- projects/suj/6/sys/ufs/ffs/ffs_softdep.c Tue Jan 26 06:36:10 2010 (r203012) +++ projects/suj/6/sys/ufs/ffs/ffs_softdep.c Tue Jan 26 06:45:38 2010 (r203013) @@ -1879,7 +1879,7 @@ softdep_unmount(mp) struct mount *mp; { - if (mp->mnt_flag & MNT_SUJ) + if (mp->mnt_kern_flag & MNTK_SUJ) journal_unmount(mp); } @@ -2021,16 +2021,36 @@ journal_mount(mp, fs, cred) struct fs *fs; struct ucred *cred; { + struct componentname cnp; struct jblocks *jblocks; + struct vnode *dvp; struct vnode *vp; struct inode *ip; ufs2_daddr_t blkno; + ino_t sujournal; int bcount; int error; int i; - mp->mnt_flag |= MNT_SUJ; - error = VFS_VGET(mp, fs->fs_sujournal, LK_EXCLUSIVE, &vp); + mp->mnt_kern_flag |= MNTK_SUJ; + error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp); + if (error) + return (error); + bzero(&cnp, sizeof(cnp)); + cnp.cn_nameiop = LOOKUP; + cnp.cn_flags = ISLASTCN; + cnp.cn_thread = curthread; + cnp.cn_cred = curthread->td_ucred; + cnp.cn_pnbuf = SUJ_FILE; + cnp.cn_nameptr = SUJ_FILE; + cnp.cn_namelen = strlen(SUJ_FILE); + error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal); + vput(dvp); + if (error != 0) { + printf("Failed to find journal. Use tunefs to create one\n"); + return (error); + } + error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, &vp); if (error) return (error); ip = VTOI(vp); @@ -2052,9 +2072,18 @@ journal_mount(mp, fs, cred) } jblocks->jb_low = jblocks->jb_free / 3; /* Reserve 33%. */ jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */ - DIP_SET(ip, i_modrev, fs->fs_mtime); - ip->i_flags |= IN_MODIFIED; - ffs_update(vp, 1); + /* + * Only validate the journal contents if the filesystem is clean, + * otherwise we write the logs but they'll never be used. If the + * filesystem was still dirty when we mounted it the journal is + * invalid and a new journal can only be valid if it starts from a + * clean mount. + */ + if (fs->fs_clean) { + DIP_SET(ip, i_modrev, fs->fs_mtime); + ip->i_flags |= IN_MODIFIED; + ffs_update(vp, 1); + } VFSTOUFS(mp)->softdep_jblocks = jblocks; out: vput(vp); @@ -2136,6 +2165,11 @@ remove_from_journal(wk) ump->softdep_on_journal -= 1; } +/* + * Check for journal space as well as dependency limits so the prelink + * code can throttle both journaled and non-journaled filesystems. + * Threshold is 0 for low and 1 for min. + */ static int journal_space(ump, thresh) struct ufsmount *ump; @@ -2144,7 +2178,20 @@ journal_space(ump, thresh) struct jblocks *jblocks; int avail; + /* + * We use a tighter restriction here to prevent request_cleanup() + * running in threads from running into locks we currently hold. + */ + if (num_inodedep > (max_softdeps / 10) * 9) + return (0); + jblocks = ump->softdep_jblocks; + if (jblocks == NULL) + return (1); + if (thresh) + thresh = jblocks->jb_min; + else + thresh = jblocks->jb_low; avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE; avail = jblocks->jb_free - avail; @@ -2187,15 +2234,13 @@ softdep_prealloc(vp, waitok) struct vnode *vp; int waitok; { - struct jblocks *jblocks; struct ufsmount *ump; if (DOINGSUJ(vp) == 0) return (0); ump = VFSTOUFS(vp->v_mount); - jblocks = ump->softdep_jblocks; ACQUIRE_LOCK(&lk); - if (journal_space(ump, jblocks->jb_low)) { + if (journal_space(ump, 0)) { FREE_LOCK(&lk); return (0); } @@ -2210,9 +2255,9 @@ softdep_prealloc(vp, waitok) ffs_syncvnode(vp, waitok); ACQUIRE_LOCK(&lk); process_removes(vp); - if (journal_space(ump, jblocks->jb_low) == 0) { + if (journal_space(ump, 0) == 0) { softdep_speedup(); - if (journal_space(ump, jblocks->jb_min) == 0) + if (journal_space(ump, 1) == 0) journal_suspend(ump); } FREE_LOCK(&lk); @@ -2220,18 +2265,22 @@ softdep_prealloc(vp, waitok) return (0); } +/* + * Before adjusting a link count on a vnode verify that we have sufficient + * journal space. If not, process operations that depend on the currently + * locked pair of vnodes to try to flush space as the syncer, buf daemon, + * and softdep flush threads can not acquire these locks to reclaim space. + */ static void softdep_prelink(dvp, vp) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201001260645.o0Q6jdqC007768>
