Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 21 Oct 2013 00:28:02 +0000 (UTC)
From:      Kirk McKusick <mckusick@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r256817 - in head/sys/ufs: ffs ufs
Message-ID:  <201310210028.r9L0S2DX081161@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mckusick
Date: Mon Oct 21 00:28:02 2013
New Revision: 256817
URL: http://svnweb.freebsd.org/changeset/base/256817

Log:
  Restructuring of the soft updates code to set it up so that the
  single kernel-wide soft update lock can be replaced with a
  per-filesystem soft-updates lock. This per-filesystem lock will
  allow each filesystem to have its own soft-updates flushing thread
  rather than being limited to a single soft-updates flushing thread
  for the entire kernel.
  
  Move soft update variables out of the ufsmount structure and into
  their own mount_softdeps structure referenced by ufsmount field
  um_softdep.  Eventually the per-filesystem lock will be in this
  structure. For now there is simply a pointer to the kernel-wide
  soft updates lock.
  
  Change all instances of ACQUIRE_LOCK and FREE_LOCK to pass the lock
  pointer in the mount_softdeps structure instead of a pointer to the
  kernel-wide soft-updates lock.
  
  Replace the five hash tables used by soft updates with per-filesystem
  copies of these tables allocated in the mount_softdeps structure.
  
  Several functions that flush dependencies when too many are allocated
  in the kernel used to operate across all filesystems. They are now
  parameterized to flush dependencies from a specified filesystem.
  For now, we stick with the round-robin flushing strategy when the
  kernel as a whole has too many dependencies allocated.
  
  While there are many lines of changes, there should be no functional
  change in the operation of soft updates.
  
  Tested by:    Peter Holm and Scott Long
  Sponsored by: Netflix

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/softdep.h
  head/sys/ufs/ufs/ufsmount.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c	Mon Oct 21 00:04:26 2013	(r256816)
+++ head/sys/ufs/ffs/ffs_softdep.c	Mon Oct 21 00:28:02 2013	(r256817)
@@ -616,48 +616,6 @@ softdep_freework(wkhd)
 
 FEATURE(softupdates, "FFS soft-updates support");
 
-/*
- * These definitions need to be adapted to the system to which
- * this file is being ported.
- */
-
-#define M_SOFTDEP_FLAGS	(M_WAITOK)
-
-#define	D_PAGEDEP	0
-#define	D_INODEDEP	1
-#define	D_BMSAFEMAP	2
-#define	D_NEWBLK	3
-#define	D_ALLOCDIRECT	4
-#define	D_INDIRDEP	5
-#define	D_ALLOCINDIR	6
-#define	D_FREEFRAG	7
-#define	D_FREEBLKS	8
-#define	D_FREEFILE	9
-#define	D_DIRADD	10
-#define	D_MKDIR		11
-#define	D_DIRREM	12
-#define	D_NEWDIRBLK	13
-#define	D_FREEWORK	14
-#define	D_FREEDEP	15
-#define	D_JADDREF	16
-#define	D_JREMREF	17
-#define	D_JMVREF	18
-#define	D_JNEWBLK	19
-#define	D_JFREEBLK	20
-#define	D_JFREEFRAG	21
-#define	D_JSEG		22
-#define	D_JSEGDEP	23
-#define	D_SBDEP		24
-#define	D_JTRUNC	25
-#define	D_JFSYNC	26
-#define	D_SENTINEL	27
-#define	D_LAST		D_SENTINEL
-
-unsigned long dep_current[D_LAST + 1];
-unsigned long dep_highuse[D_LAST + 1];
-unsigned long dep_total[D_LAST + 1];
-unsigned long dep_write[D_LAST + 1];
-
 static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0,
     "soft updates stats");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0,
@@ -669,6 +627,11 @@ static SYSCTL_NODE(_debug_softdep, OID_A
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
     "current dependencies written");
 
+unsigned long dep_current[D_LAST + 1];
+unsigned long dep_highuse[D_LAST + 1];
+unsigned long dep_total[D_LAST + 1];
+unsigned long dep_write[D_LAST + 1];
+
 #define	SOFTDEP_TYPE(type, str, long)					\
     static MALLOC_DEFINE(M_ ## type, #str, long);			\
     SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD,	\
@@ -713,6 +676,9 @@ static MALLOC_DEFINE(M_SENTINEL, "sentin
 
 static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
 static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
+static MALLOC_DEFINE(M_MOUNTDATA, "softdep", "Softdep per-mount data");
+
+#define M_SOFTDEP_FLAGS	(M_WAITOK)
 
 /* 
  * translate from workitem type to memory type
@@ -749,8 +715,6 @@ static struct malloc_type *memtype[] = {
 	M_SENTINEL
 };
 
-static LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;
-
 #define DtoM(type) (memtype[type])
 
 /*
@@ -766,51 +730,16 @@ static LIST_HEAD(mkdirlist, mkdir) mkdir
 #define	DOT_OFFSET	offsetof(struct dirtemplate, dot_ino)
 
 /*
- * Forward declarations.
- */
-struct inodedep_hashhead;
-struct newblk_hashhead;
-struct pagedep_hashhead;
-struct bmsafemap_hashhead;
-
-/*
- * Private journaling structures.
- */
-struct jblocks {
-	struct jseglst	jb_segs;	/* TAILQ of current segments. */
-	struct jseg	*jb_writeseg;	/* Next write to complete. */
-	struct jseg	*jb_oldestseg;	/* Oldest segment with valid entries. */
-	struct jextent	*jb_extent;	/* Extent array. */
-	uint64_t	jb_nextseq;	/* Next sequence number. */
-	uint64_t	jb_oldestwrseq;	/* Oldest written sequence number. */
-	uint8_t		jb_needseg;	/* Need a forced segment. */
-	uint8_t		jb_suspended;	/* Did journal suspend writes? */
-	int		jb_avail;	/* Available extents. */
-	int		jb_used;	/* Last used extent. */
-	int		jb_head;	/* Allocator head. */
-	int		jb_off;		/* Allocator extent offset. */
-	int		jb_blocks;	/* Total disk blocks covered. */
-	int		jb_free;	/* Total disk blocks free. */
-	int		jb_min;		/* Minimum free space. */
-	int		jb_low;		/* Low on space. */
-	int		jb_age;		/* Insertion time of oldest rec. */
-};
-
-struct jextent {
-	ufs2_daddr_t	je_daddr;	/* Disk block address. */
-	int		je_blocks;	/* Disk block count. */
-};
-
-/*
  * Internal function prototypes.
  */
+static	void check_clear_deps(struct mount *);
 static	void softdep_error(char *, int);
 static	int softdep_process_worklist(struct mount *, int);
 static	int softdep_waitidle(struct mount *);
 static	void drain_output(struct vnode *);
 static	struct buf *getdirtybuf(struct buf *, struct rwlock *, int);
-static	void clear_remove(void);
-static	void clear_inodedeps(void);
+static	void clear_remove(struct mount *);
+static	void clear_inodedeps(struct mount *);
 static	void unlinked_inodedep(struct mount *, struct inodedep *);
 static	void clear_unlinked_inodedep(struct inodedep *);
 static	struct inodedep *first_unlinked_inodedep(struct ufsmount *);
@@ -954,20 +883,20 @@ static	void allocdirect_merge(struct all
 	    struct allocdirect *, struct allocdirect *);
 static	struct freefrag *allocindir_merge(struct allocindir *,
 	    struct allocindir *);
-static	int bmsafemap_find(struct bmsafemap_hashhead *, struct mount *, int,
+static	int bmsafemap_find(struct bmsafemap_hashhead *, int,
 	    struct bmsafemap **);
 static	struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
 	    int cg, struct bmsafemap *);
-static	int newblk_find(struct newblk_hashhead *, struct mount *, ufs2_daddr_t,
-	    int, struct newblk **);
+static	int newblk_find(struct newblk_hashhead *, ufs2_daddr_t, int,
+	    struct newblk **);
 static	int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
-static	int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,
+static	int inodedep_find(struct inodedep_hashhead *, ino_t,
 	    struct inodedep **);
 static	int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
 static	int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
 	    int, struct pagedep **);
 static	int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
-	    struct mount *mp, int, struct pagedep **);
+	    struct pagedep **);
 static	void pause_timer(void *);
 static	int request_cleanup(struct mount *, int);
 static	int process_worklist_item(struct mount *, int, int);
@@ -982,9 +911,9 @@ static	void remove_from_worklist(struct 
 static	void softdep_flush(void);
 static	void softdep_flushjournal(struct mount *);
 static	int softdep_speedup(void);
-static	void worklist_speedup(void);
+static	void worklist_speedup(struct mount *);
 static	int journal_mount(struct mount *, struct fs *, struct ucred *);
-static	void journal_unmount(struct mount *);
+static	void journal_unmount(struct ufsmount *);
 static	int journal_space(struct ufsmount *, int);
 static	void journal_suspend(struct ufsmount *);
 static	int journal_unsuspend(struct ufsmount *ump);
@@ -1030,15 +959,25 @@ static	void softdep_disk_write_complete(
 static	void softdep_deallocate_dependencies(struct buf *);
 static	int softdep_count_dependencies(struct buf *bp, int);
 
+/*
+ * Global lock over all of soft updates.
+ */
 static struct rwlock lk;
 RW_SYSINIT(softdep_lock, &lk, "Softdep Lock");
 
-#define TRY_ACQUIRE_LOCK(lk)		rw_try_wlock(lk)
-#define ACQUIRE_LOCK(lk)		rw_wlock(lk)
-#define FREE_LOCK(lk)			rw_wunlock(lk)
+/*
+ * Allow per-filesystem soft-updates locking.
+ * For now all use the same global lock defined above.
+ */
+#define LOCK_PTR(ump)		((ump)->um_softdep->sd_fslock)
+#define TRY_ACQUIRE_LOCK(ump)	rw_try_wlock((ump)->um_softdep->sd_fslock)
+#define ACQUIRE_LOCK(ump)	rw_wlock((ump)->um_softdep->sd_fslock)
+#define FREE_LOCK(ump)		rw_wunlock((ump)->um_softdep->sd_fslock)
+#define LOCK_OWNED(ump)		rw_assert((ump)->um_softdep->sd_fslock, \
+				    RA_WLOCKED)
 
-#define	BUF_AREC(bp)			lockallowrecurse(&(bp)->b_lock)
-#define	BUF_NOREC(bp)			lockdisablerecurse(&(bp)->b_lock)
+#define	BUF_AREC(bp)		lockallowrecurse(&(bp)->b_lock)
+#define	BUF_NOREC(bp)		lockdisablerecurse(&(bp)->b_lock)
 
 /*
  * Worklist queue management.
@@ -1073,7 +1012,7 @@ worklist_insert(head, item, locked)
 {
 
 	if (locked)
-		rw_assert(&lk, RA_WLOCKED);
+		LOCK_OWNED(VFSTOUFS(item->wk_mp));
 	if (item->wk_state & ONWORKLIST)
 		panic("worklist_insert: %p %s(0x%X) already on list",
 		    item, TYPENAME(item->wk_type), item->wk_state);
@@ -1088,7 +1027,7 @@ worklist_remove(item, locked)
 {
 
 	if (locked)
-		rw_assert(&lk, RA_WLOCKED);
+		LOCK_OWNED(VFSTOUFS(item->wk_mp));
 	if ((item->wk_state & ONWORKLIST) == 0)
 		panic("worklist_remove: %p %s(0x%X) not on list",
 		    item, TYPENAME(item->wk_type), item->wk_state);
@@ -1161,7 +1100,6 @@ jwork_move(dst, src)
 			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
 	}
 
-	rw_assert(&lk, RA_WLOCKED);
 	while ((wk = LIST_FIRST(src)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(dst, wk);
@@ -1216,7 +1154,6 @@ workitem_free(item, type)
 	int type;
 {
 	struct ufsmount *ump;
-	rw_assert(&lk, RA_WLOCKED);
 
 #ifdef DEBUG
 	if (item->wk_state & ONWORKLIST)
@@ -1229,6 +1166,7 @@ workitem_free(item, type)
 	if (item->wk_state & IOWAITING)
 		wakeup(item);
 	ump = VFSTOUFS(item->wk_mp);
+	LOCK_OWNED(ump);
 	KASSERT(ump->softdep_deps > 0,
 	    ("workitem_free: %s: softdep_deps going negative",
 	    ump->um_fs->fs_fsmnt));
@@ -1237,7 +1175,11 @@ workitem_free(item, type)
 	KASSERT(dep_current[item->wk_type] > 0,
 	    ("workitem_free: %s: dep_current[%s] going negative",
 	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+	    ("workitem_free: %s: softdep_curdeps[%s] going negative",
+	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
 	dep_current[item->wk_type]--;
+	ump->softdep_curdeps[item->wk_type] -= 1;
 	free(item, DtoM(type));
 }
 
@@ -1254,14 +1196,15 @@ workitem_alloc(item, type, mp)
 	item->wk_state = 0;
 
 	ump = VFSTOUFS(mp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	dep_current[type]++;
 	if (dep_current[type] > dep_highuse[type])
 		dep_highuse[type] = dep_current[type];
 	dep_total[type]++;
+	ump->softdep_curdeps[type] += 1;
 	ump->softdep_deps++;
 	ump->softdep_accdeps++;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 }
 
 static void
@@ -1269,7 +1212,15 @@ workitem_reassign(item, newtype)
 	struct worklist *item;
 	int newtype;
 {
+	struct ufsmount *ump;
 
+	ump = VFSTOUFS(item->wk_mp);
+	LOCK_OWNED(ump);
+	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+	    ("workitem_reassign: %s: softdep_curdeps[%s] going negative",
+	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+	ump->softdep_curdeps[item->wk_type] -= 1;
+	ump->softdep_curdeps[newtype] += 1;
 	KASSERT(dep_current[item->wk_type] > 0,
 	    ("workitem_reassign: %s: dep_current[%s] going negative",
 	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
@@ -1290,7 +1241,8 @@ static int tickdelay = 2;	/* number of t
 static int proc_waiting;	/* tracks whether we have a timeout posted */
 static int *stat_countp;	/* statistic to count in proc_waiting timeout */
 static struct callout softdep_callout;
-static int req_pending;
+static struct mount *req_pending;
+#define ALLCLEAN ((struct mount *)-1)
 static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
 static int req_clear_remove;	/* syncer process flush some freeblks */
 static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
@@ -1298,6 +1250,7 @@ static int softdep_flushcache = 0; /* Sh
 /*
  * runtime statistics
  */
+static int stat_softdep_mounts;	/* number of softdep mounted filesystems */
 static int stat_worklist_push;	/* number of worklist cleanups */
 static int stat_blk_limit_push;	/* number of times block limit neared */
 static int stat_ino_limit_push;	/* number of times inode limit neared */
@@ -1329,6 +1282,8 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, tic
     &tickdelay, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, maxindirdeps, CTLFLAG_RW,
     &maxindirdeps, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, softdep_mounts, CTLFLAG_RD,
+    &stat_softdep_mounts, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
     &stat_worklist_push, 0,"");
 SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
@@ -1382,13 +1337,10 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, flu
 
 SYSCTL_DECL(_vfs_ffs);
 
-LIST_HEAD(bmsafemap_hashhead, bmsafemap) *bmsafemap_hashtbl;
-static u_long	bmsafemap_hash;	/* size of hash table - 1 */
-
-static int compute_summary_at_mount = 0;	/* Whether to recompute the summary at mount time */
+/* Whether to recompute the summary at mount time */
+static int compute_summary_at_mount = 0;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
 	   &compute_summary_at_mount, 0, "Recompute summary at mount");
-
 static struct proc *softdepproc;
 static struct kproc_desc softdep_kp = {
 	"softdepflush",
@@ -1413,21 +1365,6 @@ softdep_flush(void)
 
 	for (;;) {	
 		kproc_suspend_check(softdepproc);
-		ACQUIRE_LOCK(&lk);
-		/*
-		 * If requested, try removing inode or removal dependencies.
-		 */
-		if (req_clear_inodedeps) {
-			clear_inodedeps();
-			req_clear_inodedeps -= 1;
-			wakeup_one(&proc_waiting);
-		}
-		if (req_clear_remove) {
-			clear_remove();
-			req_clear_remove -= 1;
-			wakeup_one(&proc_waiting);
-		}
-		FREE_LOCK(&lk);
 		remaining = progress = 0;
 		mtx_lock(&mountlist_mtx);
 		for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp)  {
@@ -1436,8 +1373,8 @@ softdep_flush(void)
 				continue;
 			if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
 				continue;
-			progress += softdep_process_worklist(mp, 0);
 			ump = VFSTOUFS(mp);
+			progress += softdep_process_worklist(mp, 0);
 			remaining += ump->softdep_on_worklist;
 			mtx_lock(&mountlist_mtx);
 			nmp = TAILQ_NEXT(mp, mnt_list);
@@ -1446,20 +1383,21 @@ softdep_flush(void)
 		mtx_unlock(&mountlist_mtx);
 		if (remaining && progress)
 			continue;
-		ACQUIRE_LOCK(&lk);
-		if (!req_pending)
+		rw_wlock(&lk);
+		if (req_pending == NULL)
 			msleep(&req_pending, &lk, PVM, "sdflush", hz);
-		req_pending = 0;
-		FREE_LOCK(&lk);
+		req_pending = NULL;
+		rw_wunlock(&lk);
 	}
 }
 
 static void
-worklist_speedup(void)
+worklist_speedup(mp)
+	struct mount *mp;
 {
 	rw_assert(&lk, RA_WLOCKED);
 	if (req_pending == 0) {
-		req_pending = 1;
+		req_pending = mp;
 		wakeup(&req_pending);
 	}
 }
@@ -1468,9 +1406,9 @@ static int
 softdep_speedup(void)
 {
 
-	worklist_speedup();
+	worklist_speedup(ALLCLEAN);
 	bd_speedup();
-	return speedup_syncer();
+	return (speedup_syncer());
 }
 
 /*
@@ -1491,8 +1429,8 @@ add_to_worklist(wk, flags)
 {
 	struct ufsmount *ump;
 
-	rw_assert(&lk, RA_WLOCKED);
 	ump = VFSTOUFS(wk->wk_mp);
+	LOCK_OWNED(ump);
 	if (wk->wk_state & ONWORKLIST)
 		panic("add_to_worklist: %s(0x%X) already on list",
 		    TYPENAME(wk->wk_type), wk->wk_state);
@@ -1508,7 +1446,7 @@ add_to_worklist(wk, flags)
 	}
 	ump->softdep_on_worklist += 1;
 	if (flags & WK_NODELAY)
-		worklist_speedup();
+		worklist_speedup(wk->wk_mp);
 }
 
 /*
@@ -1544,9 +1482,11 @@ wait_worklist(wk, wmesg)
 	struct worklist *wk;
 	char *wmesg;
 {
+	struct ufsmount *ump;
 
+	ump = VFSTOUFS(wk->wk_mp);
 	wk->wk_state |= IOWAITING;
-	msleep(wk, &lk, PVM, wmesg, 0);
+	msleep(wk, LOCK_PTR(ump), PVM, wmesg, 0);
 }
 
 /*
@@ -1568,54 +1508,41 @@ softdep_process_worklist(mp, full)
 	long starttime;
 
 	KASSERT(mp != NULL, ("softdep_process_worklist: NULL mp"));
-	/*
-	 * Record the process identifier of our caller so that we can give
-	 * this process preferential treatment in request_cleanup below.
-	 */
+	if (MOUNTEDSOFTDEP(mp) == 0)
+		return (0);
 	matchcnt = 0;
 	ump = VFSTOUFS(mp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	starttime = time_second;
-	softdep_process_journal(mp, NULL, full?MNT_WAIT:0);
+	softdep_process_journal(mp, NULL, full ? MNT_WAIT : 0);
+	check_clear_deps(mp);
 	while (ump->softdep_on_worklist > 0) {
 		if ((cnt = process_worklist_item(mp, 10, LK_NOWAIT)) == 0)
 			break;
 		else
 			matchcnt += cnt;
-		/*
-		 * If requested, try removing inode or removal dependencies.
-		 */
-		if (req_clear_inodedeps) {
-			clear_inodedeps();
-			req_clear_inodedeps -= 1;
-			wakeup_one(&proc_waiting);
-		}
-		if (req_clear_remove) {
-			clear_remove();
-			req_clear_remove -= 1;
-			wakeup_one(&proc_waiting);
-		}
+		check_clear_deps(mp);
 		/*
 		 * We do not generally want to stop for buffer space, but if
 		 * we are really being a buffer hog, we will stop and wait.
 		 */
 		if (should_yield()) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			kern_yield(PRI_USER);
 			bwillwrite();
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 		}
 		/*
 		 * Never allow processing to run for more than one
-		 * second. Otherwise the other mountpoints may get
-		 * excessively backlogged.
+		 * second. This gives the syncer thread the opportunity
+		 * to pause if appropriate.
 		 */
 		if (!full && starttime != time_second)
 			break;
 	}
 	if (full == 0)
 		journal_unsuspend(ump);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	return (matchcnt);
 }
 
@@ -1630,12 +1557,13 @@ process_removes(vp)
 {
 	struct inodedep *inodedep;
 	struct dirrem *dirrem;
+	struct ufsmount *ump;
 	struct mount *mp;
 	ino_t inum;
 
-	rw_assert(&lk, RA_WLOCKED);
-
 	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	inum = VTOI(vp)->i_number;
 	for (;;) {
 top:
@@ -1658,12 +1586,12 @@ top:
 		if (dirrem == NULL)
 			return;
 		remove_from_worklist(&dirrem->dm_list);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
 			panic("process_removes: suspended filesystem");
 		handle_workitem_remove(dirrem, 0);
 		vn_finished_secondary_write(mp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 	}
 }
 
@@ -1679,13 +1607,14 @@ process_truncates(vp)
 {
 	struct inodedep *inodedep;
 	struct freeblks *freeblks;
+	struct ufsmount *ump;
 	struct mount *mp;
 	ino_t inum;
 	int cgwait;
 
-	rw_assert(&lk, RA_WLOCKED);
-
 	mp = vp->v_mount;
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	inum = VTOI(vp)->i_number;
 	for (;;) {
 		if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
@@ -1706,33 +1635,33 @@ process_truncates(vp)
 			}
 			/* Freeblks is waiting on a inode write. */
 			if ((freeblks->fb_state & COMPLETE) == 0) {
-				FREE_LOCK(&lk);
+				FREE_LOCK(ump);
 				ffs_update(vp, 1);
-				ACQUIRE_LOCK(&lk);
+				ACQUIRE_LOCK(ump);
 				break;
 			}
 			if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) ==
 			    (ALLCOMPLETE | ONWORKLIST)) {
 				remove_from_worklist(&freeblks->fb_list);
 				freeblks->fb_state |= INPROGRESS;
-				FREE_LOCK(&lk);
+				FREE_LOCK(ump);
 				if (vn_start_secondary_write(NULL, &mp,
 				    V_NOWAIT))
 					panic("process_truncates: "
 					    "suspended filesystem");
 				handle_workitem_freeblocks(freeblks, 0);
 				vn_finished_secondary_write(mp);
-				ACQUIRE_LOCK(&lk);
+				ACQUIRE_LOCK(ump);
 				break;
 			}
 			if (freeblks->fb_cgwait)
 				cgwait++;
 		}
 		if (cgwait) {
-			FREE_LOCK(&lk);
+			FREE_LOCK(ump);
 			sync_cgs(mp, MNT_WAIT);
 			ffs_sync_snap(mp, MNT_WAIT);
-			ACQUIRE_LOCK(&lk);
+			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		if (freeblks == NULL)
@@ -1756,7 +1685,6 @@ process_worklist_item(mp, target, flags)
 	int matchcnt;
 	int error;
 
-	rw_assert(&lk, RA_WLOCKED);
 	KASSERT(mp != NULL, ("process_worklist_item: NULL mp"));
 	/*
 	 * If we are being called because of a process doing a
@@ -1767,6 +1695,7 @@ process_worklist_item(mp, target, flags)
 		return (-1);
 	PHOLD(curproc);	/* Don't let the stack go away. */
 	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	matchcnt = 0;
 	sentinel.wk_mp = NULL;
 	sentinel.wk_type = D_SENTINEL;
@@ -1783,7 +1712,7 @@ process_worklist_item(mp, target, flags)
 			    wk);
 		wk->wk_state |= INPROGRESS;
 		remove_from_worklist(wk);
-		FREE_LOCK(&lk);
+		FREE_LOCK(ump);
 		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
 			panic("process_worklist_item: suspended filesystem");
 		switch (wk->wk_type) {
@@ -1816,7 +1745,7 @@ process_worklist_item(mp, target, flags)
 			/* NOTREACHED */
 		}
 		vn_finished_secondary_write(mp);
-		ACQUIRE_LOCK(&lk);
+		ACQUIRE_LOCK(ump);
 		if (error == 0) {
 			if (++matchcnt == target)
 				break;
@@ -1850,6 +1779,7 @@ softdep_move_dependencies(oldbp, newbp)
 	struct buf *newbp;
 {
 	struct worklist *wk, *wktail;
+	struct ufsmount *ump;
 	int dirty;
 
 	if ((wk = LIST_FIRST(&oldbp->b_dep)) == NULL)
@@ -1858,7 +1788,8 @@ softdep_move_dependencies(oldbp, newbp)
 	    ("softdep_move_dependencies called on non-softdep filesystem"));
 	dirty = 0;
 	wktail = NULL;
-	ACQUIRE_LOCK(&lk);
+	ump = VFSTOUFS(wk->wk_mp);
+	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
 		LIST_REMOVE(wk, wk_list);
 		if (wk->wk_type == D_BMSAFEMAP &&
@@ -1870,7 +1801,7 @@ softdep_move_dependencies(oldbp, newbp)
 			LIST_INSERT_AFTER(wktail, wk, wk_list);
 		wktail = wk;
 	}
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 
 	return (dirty);
 }
@@ -1916,15 +1847,15 @@ softdep_waitidle(struct mount *mp)
 	int i;
 
 	ump = VFSTOUFS(mp);
-	ACQUIRE_LOCK(&lk);
+	ACQUIRE_LOCK(ump);
 	for (i = 0; i < 10 && ump->softdep_deps; i++) {
 		ump->softdep_req = 1;
 		if (ump->softdep_on_worklist)
 			panic("softdep_waitidle: work added after flush.");
-		msleep(&ump->softdep_deps, &lk, PVM, "softdeps", 1);
+		msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM, "softdeps", 1);
 	}
 	ump->softdep_req = 0;
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	error = 0;
 	if (i == 10) {
 		error = EBUSY;
@@ -2023,12 +1954,14 @@ retry_flush:
 /*
  * Structure hashing.
  * 
- * There are three types of structures that can be looked up:
+ * There are four types of structures that can be looked up:
  *	1) pagedep structures identified by mount point, inode number,
  *	   and logical block.
  *	2) inodedep structures identified by mount point and inode number.
  *	3) newblk structures identified by mount point and
  *	   physical block number.
+ *	4) bmsafemap structures identified by mount point and
+ *	   cylinder group number.
  *
  * The "pagedep" and "inodedep" dependency structures are hashed
  * separately from the file blocks and inodes to which they correspond.
@@ -2040,7 +1973,8 @@ retry_flush:
  * their allocdirect or allocindir structure.
  *
  * The lookup routines optionally create and hash a new instance when
- * an existing entry is not found.
+ * an existing entry is not found. The bmsafemap lookup routine always
+ * allocates a new structure if an existing one is not found.
  */
 #define DEPALLOC	0x0001	/* allocate structure if lookup fails */
 #define NODELAY		0x0002	/* cannot do background work */
@@ -2048,26 +1982,20 @@ retry_flush:
 /*
  * Structures and routines associated with pagedep caching.
  */
-LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
-u_long	pagedep_hash;		/* size of hash table - 1 */
-#define	PAGEDEP_HASH(mp, inum, lbn) \
-	(&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
-	    pagedep_hash])
+#define	PAGEDEP_HASH(ump, inum, lbn) \
+	(&(ump)->pagedep_hashtbl[((inum) + (lbn)) & (ump)->pagedep_hash_size])
 
 static int
-pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp)
+pagedep_find(pagedephd, ino, lbn, pagedeppp)
 	struct pagedep_hashhead *pagedephd;
 	ino_t ino;
 	ufs_lbn_t lbn;
-	struct mount *mp;
-	int flags;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 
 	LIST_FOREACH(pagedep, pagedephd, pd_hash) {
-		if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn &&
-		    mp == pagedep->pd_list.wk_mp) {
+		if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn) {
 			*pagedeppp = pagedep;
 			return (1);
 		}
@@ -2093,10 +2021,12 @@ pagedep_lookup(mp, bp, ino, lbn, flags, 
 	struct pagedep *pagedep;
 	struct pagedep_hashhead *pagedephd;
 	struct worklist *wk;
+	struct ufsmount *ump;
 	int ret;
 	int i;
 
-	rw_assert(&lk, RA_WLOCKED);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
 	if (bp) {
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			if (wk->wk_type == D_PAGEDEP) {
@@ -2105,8 +2035,8 @@ pagedep_lookup(mp, bp, ino, lbn, flags, 
 			}
 		}
 	}
-	pagedephd = PAGEDEP_HASH(mp, ino, lbn);
-	ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+	pagedephd = PAGEDEP_HASH(ump, ino, lbn);
+	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
 	if (ret) {
 		if (((*pagedeppp)->pd_state & ONWORKLIST) == 0 && bp)
 			WORKLIST_INSERT(&bp->b_dep, &(*pagedeppp)->pd_list);
@@ -2114,12 +2044,12 @@ pagedep_lookup(mp, bp, ino, lbn, flags, 
 	}
 	if ((flags & DEPALLOC) == 0)
 		return (0);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	pagedep = malloc(sizeof(struct pagedep),
 	    M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);
-	ACQUIRE_LOCK(&lk);
-	ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+	ACQUIRE_LOCK(ump);
+	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
 	if (*pagedeppp) {
 		/*
 		 * This should never happen since we only create pagedeps
@@ -2143,22 +2073,19 @@ pagedep_lookup(mp, bp, ino, lbn, flags, 
 /*
  * Structures and routines associated with inodedep caching.
  */
-LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
-static u_long	inodedep_hash;	/* size of hash table - 1 */
-#define	INODEDEP_HASH(fs, inum) \
-      (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])
+#define	INODEDEP_HASH(ump, inum) \
+      (&(ump)->inodedep_hashtbl[(inum) & (ump)->inodedep_hash_size])
 
 static int
-inodedep_find(inodedephd, fs, inum, inodedeppp)
+inodedep_find(inodedephd, inum, inodedeppp)
 	struct inodedep_hashhead *inodedephd;
-	struct fs *fs;
 	ino_t inum;
 	struct inodedep **inodedeppp;
 {
 	struct inodedep *inodedep;
 
 	LIST_FOREACH(inodedep, inodedephd, id_hash)
-		if (inum == inodedep->id_ino && fs == inodedep->id_fs)
+		if (inum == inodedep->id_ino)
 			break;
 	if (inodedep) {
 		*inodedeppp = inodedep;
@@ -2183,13 +2110,15 @@ inodedep_lookup(mp, inum, flags, inodede
 {
 	struct inodedep *inodedep;
 	struct inodedep_hashhead *inodedephd;
+	struct ufsmount *ump;
 	struct fs *fs;
 
-	rw_assert(&lk, RA_WLOCKED);
-	fs = VFSTOUFS(mp)->um_fs;
-	inodedephd = INODEDEP_HASH(fs, inum);
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	fs = ump->um_fs;
+	inodedephd = INODEDEP_HASH(ump, inum);
 
-	if (inodedep_find(inodedephd, fs, inum, inodedeppp))
+	if (inodedep_find(inodedephd, inum, inodedeppp))
 		return (1);
 	if ((flags & DEPALLOC) == 0)
 		return (0);
@@ -2198,12 +2127,12 @@ inodedep_lookup(mp, inum, flags, inodede
 	 */
 	if (dep_current[D_INODEDEP] > max_softdeps && (flags & NODELAY) == 0)
 		request_cleanup(mp, FLUSH_INODES);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	inodedep = malloc(sizeof(struct inodedep),
 		M_INODEDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&inodedep->id_list, D_INODEDEP, mp);
-	ACQUIRE_LOCK(&lk);
-	if (inodedep_find(inodedephd, fs, inum, inodedeppp)) {
+	ACQUIRE_LOCK(ump);
+	if (inodedep_find(inodedephd, inum, inodedeppp)) {
 		WORKITEM_FREE(inodedep, D_INODEDEP);
 		return (1);
 	}
@@ -2235,15 +2164,12 @@ inodedep_lookup(mp, inum, flags, inodede
 /*
  * Structures and routines associated with newblk caching.
  */
-LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
-u_long	newblk_hash;		/* size of hash table - 1 */
-#define	NEWBLK_HASH(fs, inum) \
-	(&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
+#define	NEWBLK_HASH(ump, inum) \
+	(&(ump)->newblk_hashtbl[(inum) & (ump)->newblk_hash_size])
 
 static int
-newblk_find(newblkhd, mp, newblkno, flags, newblkpp)
+newblk_find(newblkhd, newblkno, flags, newblkpp)
 	struct newblk_hashhead *newblkhd;
-	struct mount *mp;
 	ufs2_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
@@ -2253,8 +2179,6 @@ newblk_find(newblkhd, mp, newblkno, flag
 	LIST_FOREACH(newblk, newblkhd, nb_hash) {
 		if (newblkno != newblk->nb_newblkno)
 			continue;
-		if (mp != newblk->nb_list.wk_mp)
-			continue;
 		/*
 		 * If we're creating a new dependency don't match those that
 		 * have already been converted to allocdirects.  This is for
@@ -2286,18 +2210,21 @@ newblk_lookup(mp, newblkno, flags, newbl
 {
 	struct newblk *newblk;
 	struct newblk_hashhead *newblkhd;
+	struct ufsmount *ump;
 
-	newblkhd = NEWBLK_HASH(VFSTOUFS(mp)->um_fs, newblkno);
-	if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp))
+	ump = VFSTOUFS(mp);
+	LOCK_OWNED(ump);
+	newblkhd = NEWBLK_HASH(ump, newblkno);
+	if (newblk_find(newblkhd, newblkno, flags, newblkpp))
 		return (1);
 	if ((flags & DEPALLOC) == 0)
 		return (0);
-	FREE_LOCK(&lk);
+	FREE_LOCK(ump);
 	newblk = malloc(sizeof(union allblk), M_NEWBLK,
 	    M_SOFTDEP_FLAGS | M_ZERO);
 	workitem_alloc(&newblk->nb_list, D_NEWBLK, mp);
-	ACQUIRE_LOCK(&lk);
-	if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) {
+	ACQUIRE_LOCK(ump);
+	if (newblk_find(newblkhd, newblkno, flags, newblkpp)) {
 		WORKITEM_FREE(newblk, D_NEWBLK);
 		return (1);
 	}
@@ -2315,10 +2242,8 @@ newblk_lookup(mp, newblkno, flags, newbl
 /*
  * Structures and routines associated with freed indirect block caching.
  */
-struct freeworklst *indir_hashtbl;
-u_long	indir_hash;		/* size of hash table - 1 */
-#define	INDIR_HASH(mp, blkno) \
-	(&indir_hashtbl[((((register_t)(mp)) >> 13) + (blkno)) & indir_hash])
+#define	INDIR_HASH(ump, blkno) \
+	(&(ump)->indir_hashtbl[(blkno) & (ump)->indir_hash_size])
 
 /*
  * Lookup an indirect block in the indir hash table.  The freework is
@@ -2331,14 +2256,14 @@ indirblk_lookup(mp, blkno)
 	ufs2_daddr_t blkno;
 {
 	struct freework *freework;
-	struct freeworklst *wkhd;
+	struct indir_hashhead *wkhd;
+	struct ufsmount *ump;
 
-	wkhd = INDIR_HASH(mp, blkno);
+	ump = VFSTOUFS(mp);
+	wkhd = INDIR_HASH(ump, blkno);
 	TAILQ_FOREACH(freework, wkhd, fw_next) {
 		if (freework->fw_blkno != blkno)
 			continue;
-		if (freework->fw_list.wk_mp != mp)
-			continue;
 		indirblk_remove(freework);
 		return (1);
 	}
@@ -2356,15 +2281,17 @@ indirblk_insert(freework)
 {
 	struct jblocks *jblocks;
 	struct jseg *jseg;
+	struct ufsmount *ump;
 
-	jblocks = VFSTOUFS(freework->fw_list.wk_mp)->softdep_jblocks;
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
+	jblocks = ump->softdep_jblocks;
 	jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
 	if (jseg == NULL)
 		return;
 	
 	LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
-	TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp,
-	    freework->fw_blkno), freework, fw_next);
+	TAILQ_INSERT_HEAD(INDIR_HASH(ump, freework->fw_blkno), freework,
+	    fw_next);
 	freework->fw_state &= ~DEPCOMPLETE;
 }
 
@@ -2372,10 +2299,11 @@ static void
 indirblk_remove(freework)
 	struct freework *freework;
 {
+	struct ufsmount *ump;
 
+	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	LIST_REMOVE(freework, fw_segs);
-	TAILQ_REMOVE(INDIR_HASH(freework->fw_list.wk_mp,
-	    freework->fw_blkno), freework, fw_next);
+	TAILQ_REMOVE(INDIR_HASH(ump, freework->fw_blkno), freework, fw_next);
 	freework->fw_state |= DEPCOMPLETE;
 	if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
 		WORKITEM_FREE(freework, D_FREEWORK);
@@ -2388,20 +2316,8 @@ indirblk_remove(freework)
 void 
 softdep_initialize()
 {
-	int i;
 
-	LIST_INIT(&mkdirlisthd);
 	max_softdeps = desiredvnodes * 4;
-	pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, &pagedep_hash);
-	inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);
-	newblk_hashtbl = hashinit(max_softdeps / 2,  M_NEWBLK, &newblk_hash);
-	bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash);
-	i = 1 << (ffs(desiredvnodes / 10) - 1);
-	indir_hashtbl = malloc(i * sizeof(indir_hashtbl[0]), M_FREEWORK,
-	    M_WAITOK);
-	indir_hash = i - 1;
-	for (i = 0; i <= indir_hash; i++)
-		TAILQ_INIT(&indir_hashtbl[i]);
 
 	/* initialise bioops hack */
 	bioops.io_start = softdep_disk_io_initiation;
@@ -2421,12 +2337,13 @@ void
 softdep_uninitialize()
 {
 
+	/* clear bioops hack */
+	bioops.io_start = NULL;
+	bioops.io_complete = NULL;
+	bioops.io_deallocate = NULL;
+	bioops.io_countdeps = NULL;
+
 	callout_drain(&softdep_callout);
-	hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash);
-	hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash);
-	hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash);
-	hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash);
-	free(indir_hashtbl, M_FREEWORK);
 }
 
 /*
@@ -2441,19 +2358,24 @@ softdep_mount(devvp, mp, fs, cred)
 	struct ucred *cred;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201310210028.r9L0S2DX081161>