Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 29 Apr 2023 18:01:54 GMT
From:      Kirk McKusick <mckusick@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 427b3d970ad9 - stable/13 - Improvement in UFS/FFS directory placement when doing mkdir(2).
Message-ID:  <202304291801.33TI1sUW001503@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by mckusick:

URL: https://cgit.FreeBSD.org/src/commit/?id=427b3d970ad9f255e5c72e98f6d8eb62baf8dcea

commit 427b3d970ad9f255e5c72e98f6d8eb62baf8dcea
Author:     Kirk McKusick <mckusick@FreeBSD.org>
AuthorDate: 2023-03-30 04:09:39 +0000
Commit:     Kirk McKusick <mckusick@FreeBSD.org>
CommitDate: 2023-04-29 18:01:47 +0000

    Improvement in UFS/FFS directory placement when doing mkdir(2).
    
    Sponsored by: The FreeBSD Foundation
    Differential Revision: https://reviews.freebsd.org/D39246
    
    (cherry picked from commit fe5e6e2cc5d6f2e4121eccdb3a8ceba646aef2c9)
---
 sbin/fsck_ffs/dir.c       | 107 +++++++++++++++++++++++++++++++++++++++++++---
 sbin/fsck_ffs/fsck.h      |   4 +-
 sbin/fsck_ffs/inode.c     |   1 +
 sbin/fsck_ffs/pass1.c     |  11 ++---
 sbin/fsck_ffs/pass2.c     |  13 ++++--
 sbin/fsck_ffs/pass3.c     |   4 +-
 sbin/fsdb/fsdb.c          |   2 +-
 sbin/newfs/mkfs.c         |  10 +++--
 sys/ufs/ffs/ffs_alloc.c   |  73 +++++++++++++++++++------------
 sys/ufs/ffs/ffs_softdep.c |  26 ++++++-----
 sys/ufs/ufs/dinode.h      |  10 ++++-
 sys/ufs/ufs/ufs_vnops.c   |   5 +++
 12 files changed, 204 insertions(+), 62 deletions(-)

diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c
index 64e477c66ed8..cc5305c390a4 100644
--- a/sbin/fsck_ffs/dir.c
+++ b/sbin/fsck_ffs/dir.c
@@ -88,12 +88,97 @@ propagate(void)
 			if (inoinfo(inp->i_parent)->ino_state == DFOUND &&
 			    INO_IS_DUNFOUND(inp->i_number)) {
 				inoinfo(inp->i_number)->ino_state = DFOUND;
+				check_dirdepth(inp);
 				change++;
 			}
 		}
 	} while (change > 0);
 }
 
+/*
+ * Check that the recorded depth of the directory is correct.
+ */
+void
+check_dirdepth(struct inoinfo *inp)
+{
+	struct inoinfo *parentinp;
+	struct inode ip;
+	union dinode *dp;
+	int saveresolved;
+	static int updateasked, dirdepthupdate;
+
+	if ((parentinp = getinoinfo(inp->i_parent)) == NULL) {
+		pfatal("check_dirdepth: UNKNOWN PARENT DIR");
+		return;
+	}
+	/*
+	 * If depth is correct, nothing to do.
+	 */
+	if (parentinp->i_depth + 1 == inp->i_depth)
+		return;
+	/*
+	 * Only the root inode should have depth of 0, so if any other
+	 * directory has a depth of 0 then this is an old filesystem
+	 * that has not been tracking directory depth. Ask just once
+	 * whether it should start tracking directory depth.
+	 */
+	if (inp->i_depth == 0 && updateasked == 0) {
+		updateasked = 1;
+		if (preen) {
+			pwarn("UPDATING FILESYSTEM TO TRACK DIRECTORY DEPTH");
+			dirdepthupdate = 1;
+		} else {
+			/*
+			 * The file system can be marked clean even if
+			 * a directory does not have the right depth.
+			 * Hence, resolved should not be cleared when
+			 * the filesystem does not update directory depths.
+			 */
+			saveresolved = resolved;
+			dirdepthupdate =
+			    reply("UPDATE FILESYSTEM TO TRACK DIRECTORY DEPTH");
+			resolved = saveresolved;
+		}
+	}
+	/*
+	 * If we are not converting, nothing more to do.
+	 */
+	if (inp->i_depth == 0 && dirdepthupdate == 0)
+		return;
+	/*
+	 * Individual directory at wrong depth. Report it and correct if
+	 * in preen mode or ask if in interactive mode. Note that if a
+	 * directory is renamed to a new location that is at a different
+	 * level in the tree, its depth will be recalculated, but none of
+	 * the directories that it contains will be updated. Thus it is
+	 * not unexpected to find directories with incorrect depths. No
+	 * operational harm will come from this though new directory
+	 * placement in the subtree may not be as optimal until the depths
+	 * of the affected directories are corrected.
+	 *
+	 * To avoid much spurious output on otherwise clean filesystems
+	 * we only generate detailed output when the debug flag is given.
+	 */
+	ginode(inp->i_number, &ip);
+	dp = ip.i_dp;
+	if (inp->i_depth != 0 && debug) {
+		pwarn("DIRECTORY");
+		prtinode(&ip);
+		printf(" DEPTH %d SHOULD BE %d", inp->i_depth,
+		    parentinp->i_depth + 1);
+		if (preen == 0 && reply("ADJUST") == 0) {
+			irelse(&ip);
+			return;
+		}
+		if (preen)
+			printf(" (ADJUSTED)\n");
+	}
+	inp->i_depth = parentinp->i_depth + 1;
+	DIP_SET(dp, di_dirdepth, inp->i_depth);
+	inodirty(&ip);
+	irelse(&ip);
+}
+
 /*
  * Scan each entry in a directory block.
  */
@@ -471,7 +556,7 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
 {
 	struct inode ip;
 	union dinode *dp;
-	int lostdir;
+	int lostdir, depth;
 	ino_t oldlfdir;
 	struct inoinfo *inp;
 	struct inodesc idesc;
@@ -546,7 +631,7 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
 			irelse(&ip);
 			return (0);
 		}
-		if ((changeino(UFS_ROOTINO, lfname, lfdir) & ALTERED) == 0) {
+		if ((changeino(UFS_ROOTINO, lfname, lfdir, 1) & ALTERED) == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
 			irelse(&ip);
 			return (0);
@@ -575,7 +660,8 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
 	}
 	inoinfo(orphan)->ino_linkcnt--;
 	if (lostdir) {
-		if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 &&
+		depth = DIP(dp, di_dirdepth) + 1;
+		if ((changeino(orphan, "..", lfdir, depth) & ALTERED) == 0 &&
 		    parentdir != (ino_t)-1)
 			(void)makeentry(orphan, lfdir, "..");
 		DIP_SET(dp, di_nlink, DIP(dp, di_nlink) + 1);
@@ -607,7 +693,7 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
  * fix an entry in a directory.
  */
 int
-changeino(ino_t dir, const char *name, ino_t newnum)
+changeino(ino_t dir, const char *name, ino_t newnum, int depth)
 {
 	struct inodesc idesc;
 	struct inode ip;
@@ -621,7 +707,10 @@ changeino(ino_t dir, const char *name, ino_t newnum)
 	idesc.id_name = strdup(name);
 	idesc.id_parent = newnum;	/* new value for name */
 	ginode(dir, &ip);
-	error = ckinode(ip.i_dp, &idesc);
+	if (((error = ckinode(ip.i_dp, &idesc)) & ALTERED) && newnum != 0) {
+		DIP_SET(ip.i_dp, di_dirdepth, depth);
+		getinoinfo(dir)->i_depth = depth;
+	}
 	free(idesc.id_name);
 	irelse(&ip);
 	return (error);
@@ -815,8 +904,8 @@ allocdir(ino_t parent, ino_t request, int mode)
 	struct inode ip;
 	union dinode *dp;
 	struct bufarea *bp;
-	struct inoinfo *inp;
 	struct dirtemplate *dirp;
+	struct inoinfo *inp, *parentinp;
 
 	ino = allocino(request, IFDIR|mode);
 	if (ino == 0)
@@ -859,6 +948,12 @@ allocdir(ino_t parent, ino_t request, int mode)
 	inp->i_parent = parent;
 	inp->i_dotdot = parent;
 	inp->i_flags |= INFO_NEW;
+	if ((parentinp = getinoinfo(inp->i_parent)) == NULL) {
+		pfatal("allocdir: UNKNOWN PARENT DIR");
+	} else {
+		inp->i_depth = parentinp->i_depth + 1; 
+		DIP_SET(dp, di_dirdepth, inp->i_depth);
+	}
 	inoinfo(ino)->ino_type = DT_DIR;
 	inoinfo(ino)->ino_state = inoinfo(parent)->ino_state;
 	if (inoinfo(ino)->ino_state == DSTATE) {
diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h
index 94ec59004b86..5ce5ca9e29a5 100644
--- a/sbin/fsck_ffs/fsck.h
+++ b/sbin/fsck_ffs/fsck.h
@@ -309,6 +309,7 @@ struct inoinfo {
 	ino_t	i_parent;		/* inode number of parent */
 	ino_t	i_dotdot;		/* inode number of `..' */
 	size_t	i_isize;		/* size of inode */
+	u_int	i_depth;		/* depth of directory from root */
 	u_int	i_flags;		/* flags, see below */
 	u_int	i_numblks;		/* size of block array in bytes */
 	ufs2_daddr_t i_blks[1];		/* actually longer */
@@ -463,9 +464,10 @@ void		catch(int);
 void		catchquit(int);
 void		cgdirty(struct bufarea *);
 struct bufarea *cglookup(int cg);
-int		changeino(ino_t dir, const char *name, ino_t newnum);
+int		changeino(ino_t dir, const char *name, ino_t newnum, int depth);
 void		check_blkcnt(struct inode *ip);
 int		check_cgmagic(int cg, struct bufarea *cgbp, int requestrebuild);
+void		check_dirdepth(struct inoinfo *inp);
 int		chkrange(ufs2_daddr_t blk, int cnt);
 void		ckfini(int markclean);
 int		ckinode(union dinode *dp, struct inodesc *);
diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c
index 947e5e0cbc08..a46fea0607a0 100644
--- a/sbin/fsck_ffs/inode.c
+++ b/sbin/fsck_ffs/inode.c
@@ -1135,6 +1135,7 @@ cacheino(union dinode *dp, ino_t inumber)
 	inp->i_dotdot = (ino_t)0;
 	inp->i_number = inumber;
 	inp->i_isize = DIP(dp, di_size);
+	inp->i_depth = DIP(dp, di_dirdepth);
 	inp->i_numblks = blks;
 	for (i = 0; i < MIN(blks, UFS_NDADDR); i++)
 		inp->i_blks[i] = DIP(dp, di_db[i]);
diff --git a/sbin/fsck_ffs/pass1.c b/sbin/fsck_ffs/pass1.c
index 5f1ad8ecb686..afe9f7f2b789 100644
--- a/sbin/fsck_ffs/pass1.c
+++ b/sbin/fsck_ffs/pass1.c
@@ -388,14 +388,15 @@ checkinode(ino_t inumber, struct inodesc *idesc, int rebuildcg)
 	n_files++;
 	inoinfo(inumber)->ino_linkcnt = DIP(dp, di_nlink);
 	if (mode == IFDIR) {
-		if (DIP(dp, di_size) == 0)
+		if (DIP(dp, di_size) == 0) {
 			inoinfo(inumber)->ino_state = DCLEAR;
-		else if (DIP(dp, di_nlink) <= 0)
+		} else if (DIP(dp, di_nlink) <= 0) {
 			inoinfo(inumber)->ino_state = DZLINK;
-		else
+		} else {
 			inoinfo(inumber)->ino_state = DSTATE;
-		cacheino(dp, inumber);
-		countdirs++;
+			cacheino(dp, inumber);
+			countdirs++;
+		}
 	} else if (DIP(dp, di_nlink) <= 0)
 		inoinfo(inumber)->ino_state = FZLINK;
 	else
diff --git a/sbin/fsck_ffs/pass2.c b/sbin/fsck_ffs/pass2.c
index abe14549e6f4..8200209cc03e 100644
--- a/sbin/fsck_ffs/pass2.c
+++ b/sbin/fsck_ffs/pass2.c
@@ -210,8 +210,10 @@ pass2(void)
 		if (inp->i_parent == 0 || inp->i_isize == 0)
 			continue;
 		if (inoinfo(inp->i_parent)->ino_state == DFOUND &&
-		    INO_IS_DUNFOUND(inp->i_number))
+		    INO_IS_DUNFOUND(inp->i_number)) {
 			inoinfo(inp->i_number)->ino_state = DFOUND;
+			check_dirdepth(inp);
+		}
 		if (inp->i_dotdot == inp->i_parent ||
 		    inp->i_dotdot == (ino_t)-1)
 			continue;
@@ -271,7 +273,8 @@ pass2(void)
 		inoinfo(inp->i_dotdot)->ino_linkcnt++;
 		inoinfo(inp->i_parent)->ino_linkcnt--;
 		inp->i_dotdot = inp->i_parent;
-		(void)changeino(inp->i_number, "..", inp->i_parent);
+		(void)changeino(inp->i_number, "..", inp->i_parent,
+		    getinoinfo(inp->i_parent)->i_depth  + 1);
 	}
 	/*
 	 * Mark all the directories that can be found from the root.
@@ -548,10 +551,12 @@ again:
 		case DFOUND:
 			inp = getinoinfo(dirp->d_ino);
 			if (idesc->id_entryno > 2) {
-				if (inp->i_parent == 0)
+				if (inp->i_parent == 0) {
 					inp->i_parent = idesc->id_number;
-				else if ((n = fix_extraneous(inp, idesc)) == 1)
+					check_dirdepth(inp);
+				} else if ((n = fix_extraneous(inp, idesc))) {
 					break;
+				}
 			}
 			/* FALLTHROUGH */
 
diff --git a/sbin/fsck_ffs/pass3.c b/sbin/fsck_ffs/pass3.c
index 22cb0393905b..b9d24f18371b 100644
--- a/sbin/fsck_ffs/pass3.c
+++ b/sbin/fsck_ffs/pass3.c
@@ -74,7 +74,7 @@ pass3(void)
 		if (inp->i_number == UFS_ROOTINO ||
 		    (inp->i_parent != 0 && !S_IS_DUNFOUND(state)))
 			continue;
-		if (state == DCLEAR)
+		if (state == DCLEAR || state == DZLINK)
 			continue;
 		/*
 		 * If we are running with soft updates and we come
@@ -102,6 +102,7 @@ pass3(void)
 				inoinfo(lfdir)->ino_linkcnt--;
 			}
 			inoinfo(orphan)->ino_state = DFOUND;
+			check_dirdepth(inp);
 			propagate();
 			continue;
 		}
@@ -127,6 +128,7 @@ pass3(void)
 		}
 		irelse(&ip);
 		inoinfo(orphan)->ino_state = DFOUND;
+		check_dirdepth(inp);
 		propagate();
 	}
 }
diff --git a/sbin/fsdb/fsdb.c b/sbin/fsdb/fsdb.c
index c935f88952b4..4d66194bf86d 100644
--- a/sbin/fsdb/fsdb.c
+++ b/sbin/fsdb/fsdb.c
@@ -781,7 +781,7 @@ CMDFUNCSTART(rm)
 
     if (!checkactivedir())
 	return 1;
-    rval = changeino(curinum, argv[1], 0);
+    rval = changeino(curinum, argv[1], 0, 0);
     if (rval & ALTERED) {
 	printf("Name `%s' removed\n", argv[1]);
 	return 0;
diff --git a/sbin/newfs/mkfs.c b/sbin/newfs/mkfs.c
index 6fc6a2475075..3d19fc88fd79 100644
--- a/sbin/newfs/mkfs.c
+++ b/sbin/newfs/mkfs.c
@@ -912,8 +912,9 @@ fsinit(time_t utime)
 				    alloc(sblock.fs_fsize, node.dp1.di_mode);
 			node.dp1.di_blocks =
 			    btodb(fragroundup(&sblock, node.dp1.di_size));
-				wtfs(fsbtodb(&sblock, node.dp1.di_db[0]),
-				    sblock.fs_fsize, iobuf);
+			node.dp1.di_dirdepth = 1;
+			wtfs(fsbtodb(&sblock, node.dp1.di_db[0]),
+			    sblock.fs_fsize, iobuf);
 			iput(&node, UFS_ROOTINO + 1);
 		}
 	} else {
@@ -948,8 +949,9 @@ fsinit(time_t utime)
 				    alloc(sblock.fs_fsize, node.dp2.di_mode);
 			node.dp2.di_blocks =
 			    btodb(fragroundup(&sblock, node.dp2.di_size));
-				wtfs(fsbtodb(&sblock, node.dp2.di_db[0]), 
-				    sblock.fs_fsize, iobuf);
+			node.dp2.di_dirdepth = 1;
+			wtfs(fsbtodb(&sblock, node.dp2.di_db[0]), 
+			    sblock.fs_fsize, iobuf);
 			iput(&node, UFS_ROOTINO + 1);
 		}
 	}
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 4b0c7b108cb6..6d37afcfadf6 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -1179,6 +1179,8 @@ retry:
 	}
 	ip->i_flags = 0;
 	DIP_SET(ip, i_flags, 0);
+	if ((mode & IFMT) == IFDIR)
+		DIP_SET(ip, i_dirdepth, DIP(pip, i_dirdepth) + 1);
 	/*
 	 * Set up a new generation number for this inode.
 	 */
@@ -1238,10 +1240,10 @@ static ino_t
 ffs_dirpref(struct inode *pip)
 {
 	struct fs *fs;
-	int cg, prefcg, dirsize, cgsize;
+	int cg, prefcg, curcg, dirsize, cgsize;
+	int depth, range, start, end, numdirs, power, numerator, denominator;
 	u_int avgifree, avgbfree, avgndir, curdirsize;
 	u_int minifree, minbfree, maxndir;
-	u_int mincg, minndir;
 	u_int maxcontigdirs;
 
 	mtx_assert(UFS_MTX(ITOUMP(pip)), MA_OWNED);
@@ -1252,35 +1254,53 @@ ffs_dirpref(struct inode *pip)
 	avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
 
 	/*
-	 * Force allocation in another cg if creating a first level dir.
-	 */
-	ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref");
-	if (ITOV(pip)->v_vflag & VV_ROOT) {
-		prefcg = arc4random() % fs->fs_ncg;
-		mincg = prefcg;
-		minndir = fs->fs_ipg;
-		for (cg = prefcg; cg < fs->fs_ncg; cg++)
-			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
-			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
-			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
-				mincg = cg;
-				minndir = fs->fs_cs(fs, cg).cs_ndir;
-			}
-		for (cg = 0; cg < prefcg; cg++)
-			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
-			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
-			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
-				mincg = cg;
-				minndir = fs->fs_cs(fs, cg).cs_ndir;
-			}
-		return ((ino_t)(fs->fs_ipg * mincg));
-	}
+	 * Select a preferred cylinder group to place a new directory.
+	 * If we are near the root of the filesystem we aim to spread
+	 * them out as much as possible. As we descend deeper from the
+	 * root we cluster them closer together around their parent as
+	 * we expect them to be more closely interactive. Higher-level
+	 * directories like usr/src/sys and usr/src/bin should be
+	 * separated while the directories in these areas are more
+	 * likely to be accessed together so should be closer.
+	 *
+	 * We pick a range of cylinder groups around the cylinder group
+	 * of the directory in which we are being created. The size of
+	 * the range for our search is based on our depth from the root
+	 * of our filesystem. We then probe that range based on how many
+	 * directories are already present. The first new directory is at
+	 * 1/2 (middle) of the range; the second is in the first 1/4 of the
+	 * range, then at 3/4, 1/8, 3/8, 5/8, 7/8, 1/16, 3/16, 5/16, etc.
+	 */
+	depth = DIP(pip, i_dirdepth);
+	range = fs->fs_ncg / (1 << depth);
+	curcg = ino_to_cg(fs, pip->i_number);
+	start = curcg - (range / 2);
+	if (start < 0)
+		start += fs->fs_ncg;
+	end = curcg + (range / 2);
+	if (end >= fs->fs_ncg)
+		end -= fs->fs_ncg;
+	numdirs = pip->i_effnlink - 1;
+	power = fls(numdirs);
+	numerator = (numdirs & ~(1 << (power - 1))) * 2 + 1;
+	denominator = 1 << power;
+	prefcg = (curcg - (range / 2) + (range * numerator / denominator));
+	if (prefcg < 0)
+		prefcg += fs->fs_ncg;
+	if (prefcg >= fs->fs_ncg)
+		prefcg -= fs->fs_ncg;
+	/*
+	 * If this filesystem is not tracking directory depths,
+	 * revert to the old algorithm.
+	 */
+	if (depth == 0 && pip->i_number != UFS_ROOTINO)
+		prefcg = curcg;
 
 	/*
 	 * Count various limits which used for
 	 * optimal allocation of a directory inode.
 	 */
-	maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
+	maxndir = min(avgndir + (1 << depth), fs->fs_ipg);
 	minifree = avgifree - avgifree / 4;
 	if (minifree < 1)
 		minifree = 1;
@@ -1324,7 +1344,6 @@ ffs_dirpref(struct inode *pip)
 	 * in new cylinder groups so finds every possible block after
 	 * one pass over the filesystem.
 	 */
-	prefcg = ino_to_cg(fs, pip->i_number);
 	for (cg = prefcg; cg < fs->fs_ncg; cg++)
 		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
 		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index d784d8d0fd1a..ff72dad97250 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -12485,17 +12485,6 @@ softdep_update_inodeblock(
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_update_inodeblock called on non-softdep filesystem"));
 	fs = ump->um_fs;
-	/*
-	 * Preserve the freelink that is on disk.  clear_unlinked_inodedep()
-	 * does not have access to the in-core ip so must write directly into
-	 * the inode block buffer when setting freelink.
-	 */
-	if (fs->fs_magic == FS_UFS1_MAGIC)
-		DIP_SET(ip, i_freelink, ((struct ufs1_dinode *)bp->b_data +
-		    ino_to_fsbo(fs, ip->i_number))->di_freelink);
-	else
-		DIP_SET(ip, i_freelink, ((struct ufs2_dinode *)bp->b_data +
-		    ino_to_fsbo(fs, ip->i_number))->di_freelink);
 	/*
 	 * If the effective link count is not equal to the actual link
 	 * count, then we must track the difference in an inodedep while
@@ -12511,6 +12500,21 @@ again:
 			panic("softdep_update_inodeblock: bad link count");
 		return;
 	}
+	/*
+	 * Preserve the freelink that is on disk.  clear_unlinked_inodedep()
+	 * does not have access to the in-core ip so must write directly into
+	 * the inode block buffer when setting freelink.
+	 */
+	if ((inodedep->id_state & UNLINKED) != 0) {
+		if (fs->fs_magic == FS_UFS1_MAGIC)
+			DIP_SET(ip, i_freelink,
+			    ((struct ufs1_dinode *)bp->b_data +
+			    ino_to_fsbo(fs, ip->i_number))->di_freelink);
+		else
+			DIP_SET(ip, i_freelink,
+			    ((struct ufs2_dinode *)bp->b_data +
+			    ino_to_fsbo(fs, ip->i_number))->di_freelink);
+	}
 	KASSERT(ip->i_nlink >= inodedep->id_nlinkdelta,
 	    ("softdep_update_inodeblock inconsistent ip %p i_nlink %d "
 	    "inodedep %p id_nlinkdelta %jd",
diff --git a/sys/ufs/ufs/dinode.h b/sys/ufs/ufs/dinode.h
index 840a4cc7d40f..e4a424abe2e6 100644
--- a/sys/ufs/ufs/dinode.h
+++ b/sys/ufs/ufs/dinode.h
@@ -156,7 +156,10 @@ struct ufs2_dinode {
 		    [(UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t)];
 	};
 	u_int64_t	di_modrev;	/* 232: i_modrev for NFSv4 */
-	uint32_t	di_freelink;	/* 240: SUJ: Next unlinked inode. */
+	union {
+		uint32_t di_freelink;	/* 240: SUJ: Next unlinked inode. */
+		uint32_t di_dirdepth;	/* 240: IFDIR: depth from root dir */
+	};
 	uint32_t	di_ckhash;	/* 244: if CK_INODE, its check-hash */
 	uint32_t	di_spare[2];	/* 248: Reserved; currently unused */
 };
@@ -179,7 +182,10 @@ struct ufs2_dinode {
 struct ufs1_dinode {
 	u_int16_t	di_mode;	/*   0: IFMT, permissions; see below. */
 	int16_t		di_nlink;	/*   2: File link count. */
-	uint32_t	di_freelink;	/*   4: SUJ: Next unlinked inode. */
+	union {
+		uint32_t di_freelink;	/*   4: SUJ: Next unlinked inode. */
+		uint32_t di_dirdepth;	/*   4: IFDIR: depth from root dir */
+	};
 	u_int64_t	di_size;	/*   8: File byte count. */
 	int32_t		di_atime;	/*  16: Last access time. */
 	int32_t		di_atimensec;	/*  20: Last access time. */
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 1ebb3597b925..985641b02378 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -1723,6 +1723,10 @@ relock:
 	 * and ".." set to point to the new parent.
 	 */
 	if (doingdirectory && newparent) {
+		/*
+		 * Set the directory depth based on its new parent.
+		 */
+		DIP_SET(fip, i_dirdepth, DIP(tdp, i_dirdepth) + 1);
 		/*
 		 * If tip exists we simply use its link, otherwise we must
 		 * add a new one.
@@ -2136,6 +2140,7 @@ ufs_mkdir(
 	ip->i_effnlink = 2;
 	ip->i_nlink = 2;
 	DIP_SET(ip, i_nlink, 2);
+	DIP_SET(ip, i_dirdepth, DIP(dp,i_dirdepth) + 1);
 
 	if (cnp->cn_flags & ISWHITEOUT) {
 		ip->i_flags |= UF_OPAQUE;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202304291801.33TI1sUW001503>