From owner-svn-src-head@FreeBSD.ORG Mon Aug 4 22:03:59 2014 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 7CB17D4B for ; Mon, 4 Aug 2014 22:03:59 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1 with cipher ECDHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 6A31621FF for ; Mon, 4 Aug 2014 22:03:59 +0000 (UTC) Received: from mckusick (uid 740) (envelope-from mckusick@FreeBSD.org) id 554f by svn.freebsd.org (DragonFly Mail Agent v0.9+); Mon, 04 Aug 2014 22:03:58 +0000 From: Kirk McKusick Date: Mon, 4 Aug 2014 22:03:58 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r269533 - in head/sys: kern ufs/ffs X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Message-Id: <53e0034f.554f.4192a68b@svn.freebsd.org> X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.18 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 04 Aug 2014 22:03:59 -0000 Author: mckusick Date: Mon Aug 4 22:03:58 2014 New Revision: 269533 URL: http://svnweb.freebsd.org/changeset/base/269533 Log: Add support for multi-threading of soft updates. Replace a single soft updates thread with a thread per FFS-filesystem mount point. The threads are associated with the bufdaemon process. Reviewed by: kib Tested by: Peter Holm and Scott Long MFC after: 2 weeks Sponsored by: Netflix Modified: head/sys/kern/vfs_bio.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/softdep.h Modified: head/sys/kern/vfs_bio.c ============================================================================== --- head/sys/kern/vfs_bio.c Mon Aug 4 21:41:01 2014 (r269532) +++ head/sys/kern/vfs_bio.c Mon Aug 4 22:03:58 2014 (r269533) @@ -98,7 +98,8 @@ struct buf_ops buf_ops_bio = { struct buf *buf; /* buffer header pool */ caddr_t unmapped_buf; -static struct proc *bufdaemonproc; +/* Used below and for softdep flushing threads in ufs/ffs/ffs_softdep.c */ +struct proc *bufdaemonproc; static int inmem(struct vnode *vp, daddr_t blkno); static void vm_hold_free_pages(struct buf *bp, int newbsize); Modified: head/sys/ufs/ffs/ffs_softdep.c ============================================================================== --- head/sys/ufs/ffs/ffs_softdep.c Mon Aug 4 21:41:01 2014 (r269532) +++ head/sys/ufs/ffs/ffs_softdep.c Mon Aug 4 22:03:58 2014 (r269533) @@ -908,9 +908,9 @@ static void add_to_worklist(struct workl static void wake_worklist(struct worklist *); static void wait_worklist(struct worklist *, char *); static void remove_from_worklist(struct worklist *); -static void softdep_flush(void); +static void softdep_flush(void *); static void softdep_flushjournal(struct mount *); -static int softdep_speedup(void); +static int softdep_speedup(struct ufsmount *); static void worklist_speedup(struct mount *); static int journal_mount(struct mount *, struct fs *, struct ucred *); static void journal_unmount(struct ufsmount *); @@ -962,18 +962,21 @@ static int softdep_count_dependencies(st /* * Global lock over all of soft updates. */ -static struct rwlock lk; -RW_SYSINIT(softdep_lock, &lk, "Softdep Lock"); +static struct mtx lk; +MTX_SYSINIT(softdep_lock, &lk, "Global Softdep Lock", MTX_DEF); + +#define ACQUIRE_GBLLOCK(lk) mtx_lock(lk) +#define FREE_GBLLOCK(lk) mtx_unlock(lk) +#define GBLLOCK_OWNED(lk) mtx_assert((lk), MA_OWNED) /* - * Allow per-filesystem soft-updates locking. - * For now all use the same global lock defined above. + * Per-filesystem soft-updates locking. */ -#define LOCK_PTR(ump) ((ump)->um_softdep->sd_fslock) -#define TRY_ACQUIRE_LOCK(ump) rw_try_wlock((ump)->um_softdep->sd_fslock) -#define ACQUIRE_LOCK(ump) rw_wlock((ump)->um_softdep->sd_fslock) -#define FREE_LOCK(ump) rw_wunlock((ump)->um_softdep->sd_fslock) -#define LOCK_OWNED(ump) rw_assert((ump)->um_softdep->sd_fslock, \ +#define LOCK_PTR(ump) (&(ump)->um_softdep->sd_fslock) +#define TRY_ACQUIRE_LOCK(ump) rw_try_wlock(&(ump)->um_softdep->sd_fslock) +#define ACQUIRE_LOCK(ump) rw_wlock(&(ump)->um_softdep->sd_fslock) +#define FREE_LOCK(ump) rw_wunlock(&(ump)->um_softdep->sd_fslock) +#define LOCK_OWNED(ump) rw_assert(&(ump)->um_softdep->sd_fslock, \ RA_WLOCKED) #define BUF_AREC(bp) lockallowrecurse(&(bp)->b_lock) @@ -1178,7 +1181,7 @@ workitem_free(item, type) KASSERT(ump->softdep_curdeps[item->wk_type] > 0, ("workitem_free: %s: softdep_curdeps[%s] going negative", ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type))); - dep_current[item->wk_type]--; + atomic_subtract_long(&dep_current[item->wk_type], 1); ump->softdep_curdeps[item->wk_type] -= 1; free(item, DtoM(type)); } @@ -1196,11 +1199,13 @@ workitem_alloc(item, type, mp) item->wk_state = 0; ump = VFSTOUFS(mp); - ACQUIRE_LOCK(ump); + ACQUIRE_GBLLOCK(&lk); dep_current[type]++; if (dep_current[type] > dep_highuse[type]) dep_highuse[type] = dep_current[type]; dep_total[type]++; + FREE_GBLLOCK(&lk); + ACQUIRE_LOCK(ump); ump->softdep_curdeps[type] += 1; ump->softdep_deps++; ump->softdep_accdeps++; @@ -1224,11 +1229,13 @@ workitem_reassign(item, newtype) KASSERT(dep_current[item->wk_type] > 0, ("workitem_reassign: %s: dep_current[%s] going negative", VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type))); - dep_current[item->wk_type]--; + ACQUIRE_GBLLOCK(&lk); dep_current[newtype]++; + dep_current[item->wk_type]--; if (dep_current[newtype] > dep_highuse[newtype]) dep_highuse[newtype] = dep_current[newtype]; dep_total[newtype]++; + FREE_GBLLOCK(&lk); item->wk_type = newtype; } @@ -1236,13 +1243,10 @@ workitem_reassign(item, newtype) * Workitem queue management */ static int max_softdeps; /* maximum number of structs before slowdown */ -static int maxindirdeps = 50; /* max number of indirdeps before slowdown */ static int tickdelay = 2; /* number of ticks to pause during slowdown */ static int proc_waiting; /* tracks whether we have a timeout posted */ static int *stat_countp; /* statistic to count in proc_waiting timeout */ static struct callout softdep_callout; -static struct mount *req_pending; -#define ALLCLEAN ((struct mount *)-1) static int req_clear_inodedeps; /* syncer process flush some inodedeps */ static int req_clear_remove; /* syncer process flush some freeblks */ static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */ @@ -1250,7 +1254,7 @@ static int softdep_flushcache = 0; /* Sh /* * runtime statistics */ -static int stat_softdep_mounts; /* number of softdep mounted filesystems */ +static int stat_flush_threads; /* number of softdep flushing threads */ static int stat_worklist_push; /* number of worklist cleanups */ static int stat_blk_limit_push; /* number of times block limit neared */ static int stat_ino_limit_push; /* number of times inode limit neared */ @@ -1281,10 +1285,8 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, max &max_softdeps, 0, ""); SYSCTL_INT(_debug_softdep, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, ""); -SYSCTL_INT(_debug_softdep, OID_AUTO, maxindirdeps, CTLFLAG_RW, - &maxindirdeps, 0, ""); -SYSCTL_INT(_debug_softdep, OID_AUTO, softdep_mounts, CTLFLAG_RD, - &stat_softdep_mounts, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, flush_threads, CTLFLAG_RD, + &stat_flush_threads, 0, ""); SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW, &stat_worklist_push, 0,""); SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW, @@ -1344,53 +1346,67 @@ SYSCTL_DECL(_vfs_ffs); static int compute_summary_at_mount = 0; SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW, &compute_summary_at_mount, 0, "Recompute summary at mount"); -static struct proc *softdepproc; -static struct kproc_desc softdep_kp = { - "softdepflush", - softdep_flush, - &softdepproc -}; -SYSINIT(sdproc, SI_SUB_KTHREAD_UPDATE, SI_ORDER_ANY, kproc_start, - &softdep_kp); - +static int print_threads = 0; +SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW, + &print_threads, 0, "Notify flusher thread start/stop"); + +/* List of all filesystems mounted with soft updates */ +static TAILQ_HEAD(, mount_softdeps) softdepmounts; + +/* + * This function cleans the worklist for a filesystem. + * Each filesystem running with soft dependencies gets its own + * thread to run in this function. The thread is started up in + * softdep_mount and shutdown in softdep_unmount. They show up + * as part of the kernel "bufdaemon" process whose process + * entry is available in bufdaemonproc. + */ +static int searchfailed; +extern struct proc *bufdaemonproc; static void -softdep_flush(void) +softdep_flush(addr) + void *addr; { - struct mount *nmp; struct mount *mp; - struct ufsmount *ump; struct thread *td; - int remaining; - int progress; + struct ufsmount *ump; td = curthread; td->td_pflags |= TDP_NORUNNINGBUF; - + mp = (struct mount *)addr; + ump = VFSTOUFS(mp); + atomic_add_int(&stat_flush_threads, 1); + if (print_threads) { + if (stat_flush_threads == 1) + printf("Running %s at pid %d\n", bufdaemonproc->p_comm, + bufdaemonproc->p_pid); + printf("Start thread %s\n", td->td_name); + } for (;;) { - kproc_suspend_check(softdepproc); - remaining = progress = 0; - mtx_lock(&mountlist_mtx); - for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { - nmp = TAILQ_NEXT(mp, mnt_list); - if (MOUNTEDSOFTDEP(mp) == 0) - continue; - if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) - continue; - ump = VFSTOUFS(mp); - progress += softdep_process_worklist(mp, 0); - remaining += ump->softdep_on_worklist; - mtx_lock(&mountlist_mtx); - nmp = TAILQ_NEXT(mp, mnt_list); - vfs_unbusy(mp); - } - mtx_unlock(&mountlist_mtx); - if (remaining && progress) + while (softdep_process_worklist(mp, 0) > 0 || + (MOUNTEDSUJ(mp) && + VFSTOUFS(mp)->softdep_jblocks->jb_suspended)) + kthread_suspend_check(); + ACQUIRE_LOCK(ump); + if ((ump->softdep_flags & FLUSH_CLEANUP) == 0) + msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM, + "sdflush", hz / 2); + ump->softdep_flags &= ~FLUSH_CLEANUP; + /* + * Check to see if we are done and need to exit. + */ + if ((ump->softdep_flags & FLUSH_EXIT) == 0) { + FREE_LOCK(ump); continue; - rw_wlock(&lk); - if (req_pending == NULL) - msleep(&req_pending, &lk, PVM, "sdflush", hz); - req_pending = NULL; - rw_wunlock(&lk); + } + ump->softdep_flags &= ~FLUSH_EXIT; + FREE_LOCK(ump); + wakeup(&ump->softdep_flags); + if (print_threads) + printf("Stop thread %s: searchfailed %d, did cleanups %d\n", td->td_name, searchfailed, ump->um_softdep->sd_cleanups); + atomic_subtract_int(&stat_flush_threads, 1); + kthread_exit(); + panic("kthread_exit failed\n"); } } @@ -1398,19 +1414,70 @@ static void worklist_speedup(mp) struct mount *mp; { - rw_assert(&lk, RA_WLOCKED); - if (req_pending == 0) { - req_pending = mp; - wakeup(&req_pending); + struct ufsmount *ump; + + ump = VFSTOUFS(mp); + LOCK_OWNED(ump); + if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0) { + ump->softdep_flags |= FLUSH_CLEANUP; + if (ump->softdep_flushtd->td_wchan == &ump->softdep_flushtd) + wakeup(&ump->softdep_flushtd); } } static int -softdep_speedup(void) +softdep_speedup(ump) + struct ufsmount *ump; { + struct ufsmount *altump; + struct mount_softdeps *sdp; - worklist_speedup(ALLCLEAN); + LOCK_OWNED(ump); + worklist_speedup(ump->um_mountp); bd_speedup(); + /* + * If we have global shortages, then we need other + * filesystems to help with the cleanup. Here we wakeup a + * flusher thread for a filesystem that is over its fair + * share of resources. + */ + if (req_clear_inodedeps || req_clear_remove) { + ACQUIRE_GBLLOCK(&lk); + TAILQ_FOREACH(sdp, &softdepmounts, sd_next) { + if ((altump = sdp->sd_ump) == ump) + continue; + if (((req_clear_inodedeps && + altump->softdep_curdeps[D_INODEDEP] > + max_softdeps / stat_flush_threads) || + (req_clear_remove && + altump->softdep_curdeps[D_DIRREM] > + (max_softdeps / 2) / stat_flush_threads)) && + TRY_ACQUIRE_LOCK(altump)) + break; + } + if (sdp == NULL) { + searchfailed++; + FREE_GBLLOCK(&lk); + } else { + /* + * Move to the end of the list so we pick a + * different one on out next try. + */ + TAILQ_REMOVE(&softdepmounts, sdp, sd_next); + TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next); + FREE_GBLLOCK(&lk); + if ((altump->softdep_flags & + (FLUSH_CLEANUP | FLUSH_EXIT)) == 0) { + altump->softdep_flags |= FLUSH_CLEANUP; + altump->um_softdep->sd_cleanups++; + if (altump->softdep_flushtd->td_wchan == + &altump->softdep_flushtd) { + wakeup(&altump->softdep_flushtd); + } + } + FREE_LOCK(altump); + } + } return (speedup_syncer()); } @@ -2126,9 +2193,14 @@ inodedep_lookup(mp, inum, flags, inodede if ((flags & DEPALLOC) == 0) return (0); /* - * If we are over our limit, try to improve the situation. - */ - if (dep_current[D_INODEDEP] > max_softdeps && (flags & NODELAY) == 0) + * If the system is over its limit and our filesystem is + * responsible for more than our share of that usage and + * we are not in a rush, request some inodedep cleanup. + */ + while (dep_current[D_INODEDEP] > max_softdeps && + (flags & NODELAY) == 0 && + ump->softdep_curdeps[D_INODEDEP] > + max_softdeps / stat_flush_threads) request_cleanup(mp, FLUSH_INODES); FREE_LOCK(ump); inodedep = malloc(sizeof(struct inodedep), @@ -2320,6 +2392,7 @@ void softdep_initialize() { + TAILQ_INIT(&softdepmounts); max_softdeps = desiredvnodes * 4; /* initialise bioops hack */ @@ -2378,7 +2451,9 @@ softdep_mount(devvp, mp, fs, cred) ump = VFSTOUFS(mp); ump->um_softdep = sdp; MNT_IUNLOCK(mp); - LOCK_PTR(ump) = &lk; + rw_init(LOCK_PTR(ump), "Per-Filesystem Softdep Lock"); + TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next); + sdp->sd_ump = ump; LIST_INIT(&ump->softdep_workitem_pending); LIST_INIT(&ump->softdep_journal_pending); TAILQ_INIT(&ump->softdep_unlinked); @@ -2409,7 +2484,12 @@ softdep_mount(devvp, mp, fs, cred) softdep_unmount(mp); return (error); } - atomic_add_int(&stat_softdep_mounts, 1); + /* + * Start our flushing thread in the bufdaemon process. + */ + kproc_kthread_add(&softdep_flush, mp, &bufdaemonproc, + &ump->softdep_flushtd, 0, 0, "softdepflush", "%s worker", + mp->mnt_stat.f_mntonname); /* * When doing soft updates, the counters in the * superblock may have gotten out of sync. Recomputation @@ -2465,7 +2545,24 @@ softdep_unmount(mp) MNT_IUNLOCK(mp); journal_unmount(ump); } - atomic_subtract_int(&stat_softdep_mounts, 1); + /* + * Shut down our flushing thread. Check for NULL is if + * softdep_mount errors out before the thread has been created. + */ + if (ump->softdep_flushtd != NULL) { + ACQUIRE_LOCK(ump); + ump->softdep_flags |= FLUSH_EXIT; + wakeup(&ump->softdep_flushtd); + msleep(&ump->softdep_flags, LOCK_PTR(ump), PVM | PDROP, + "sdwait", 0); + KASSERT((ump->softdep_flags & FLUSH_EXIT) == 0, + ("Thread shutdown failed")); + } + /* + * Free up our resources. + */ + rw_destroy(LOCK_PTR(ump)); + TAILQ_REMOVE(&softdepmounts, ump->um_softdep, sd_next); hashdestroy(ump->pagedep_hashtbl, M_PAGEDEP, ump->pagedep_hash_size); hashdestroy(ump->inodedep_hashtbl, M_INODEDEP, ump->inodedep_hash_size); hashdestroy(ump->newblk_hashtbl, M_NEWBLK, ump->newblk_hash_size); @@ -2788,7 +2885,7 @@ journal_space(ump, thresh) */ limit = (max_softdeps / 10) * 9; if (dep_current[D_INODEDEP] > limit && - ump->softdep_curdeps[D_INODEDEP] > limit / stat_softdep_mounts) + ump->softdep_curdeps[D_INODEDEP] > limit / stat_flush_threads) return (0); if (thresh) thresh = jblocks->jb_min; @@ -2813,7 +2910,7 @@ journal_suspend(ump) if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) { stat_journal_min++; mp->mnt_kern_flag |= MNTK_SUSPEND; - mp->mnt_susp_owner = FIRST_THREAD_IN_PROC(softdepproc); + mp->mnt_susp_owner = ump->softdep_flushtd; } jblocks->jb_suspended = 1; MNT_IUNLOCK(mp); @@ -2888,7 +2985,7 @@ softdep_prealloc(vp, waitok) process_removes(vp); process_truncates(vp); if (journal_space(ump, 0) == 0) { - softdep_speedup(); + softdep_speedup(ump); if (journal_space(ump, 1) == 0) journal_suspend(ump); } @@ -2932,10 +3029,10 @@ softdep_prelink(dvp, vp) } process_removes(dvp); process_truncates(dvp); - softdep_speedup(); + softdep_speedup(ump); process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT); if (journal_space(ump, 0) == 0) { - softdep_speedup(); + softdep_speedup(ump); if (journal_space(ump, 1) == 0) journal_suspend(ump); } @@ -3257,7 +3354,7 @@ softdep_process_journal(mp, needwk, flag if (flags != MNT_WAIT) break; printf("softdep: Out of journal space!\n"); - softdep_speedup(); + softdep_speedup(ump); msleep(jblocks, LOCK_PTR(ump), PRIBIO, "jblocks", hz); } FREE_LOCK(ump); @@ -3970,7 +4067,7 @@ free_freedep(freedep) /* * Allocate a new freework structure that may be a level in an indirect * when parent is not NULL or a top level block when it is. The top level - * freework structures are allocated without the soft updates lock held + * freework structures are allocated without the per-filesystem lock held * and before the freeblks is visible outside of softdep_setup_freeblocks(). */ static struct freework * @@ -4039,7 +4136,7 @@ cancel_jfreeblk(freeblks, blkno) /* * Allocate a new jfreeblk to journal top level block pointer when truncating - * a file. The caller must add this to the worklist when the soft updates + * a file. The caller must add this to the worklist when the per-filesystem * lock is held. */ static struct jfreeblk * @@ -7419,7 +7516,7 @@ softdep_freefile(pvp, ino, mode) clear_unlinked_inodedep(inodedep); /* * Re-acquire inodedep as we've dropped the - * soft updates lock in clear_unlinked_inodedep(). + * per-filesystem lock in clear_unlinked_inodedep(). */ inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); } @@ -7965,10 +8062,8 @@ indir_trunc(freework, dbn, lbn) * If we're goingaway, free the indirdep. Otherwise it will * linger until the write completes. */ - if (goingaway) { + if (goingaway) free_indirdep(indirdep); - ump->softdep_numindirdeps -= 1; - } } FREE_LOCK(ump); /* Initialize pointers depending on block size. */ @@ -8140,7 +8235,7 @@ cancel_allocindir(aip, bp, freeblks, tru * Create the mkdir dependencies for . and .. in a new directory. Link them * in to a newdirblk so any subsequent additions are tracked properly. The * caller is responsible for adding the mkdir1 dependency to the journal - * and updating id_mkdiradd. This function returns with the soft updates + * and updating id_mkdiradd. This function returns with the per-filesystem * lock held. */ static struct mkdir * @@ -8958,12 +9053,16 @@ newdirrem(bp, dp, ip, isrmdir, prevdirre panic("newdirrem: whiteout"); dvp = ITOV(dp); /* - * If we are over our limit, try to improve the situation. + * If the system is over its limit and our filesystem is + * responsible for more than our share of that usage and + * we are not a snapshot, request some inodedep cleanup. * Limiting the number of dirrem structures will also limit * the number of freefile and freeblks structures. */ ACQUIRE_LOCK(ip->i_ump); - if (!IS_SNAPSHOT(ip) && dep_current[D_DIRREM] > max_softdeps / 2) + while (!IS_SNAPSHOT(ip) && dep_current[D_DIRREM] > max_softdeps / 2 && + ip->i_ump->softdep_curdeps[D_DIRREM] > + (max_softdeps / 2) / stat_flush_threads) (void) request_cleanup(ITOV(dp)->v_mount, FLUSH_BLOCKS); FREE_LOCK(ip->i_ump); dirrem = malloc(sizeof(struct dirrem), @@ -9914,7 +10013,7 @@ initiate_write_filepage(pagedep, bp) * Wait for all journal remove dependencies to hit the disk. * We can not allow any potentially conflicting directory adds * to be visible before removes and rollback is too difficult. - * The soft updates lock may be dropped and re-acquired, however + * The per-filesystem lock may be dropped and re-acquired, however * we hold the buf locked so the dependency can not go away. */ LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) @@ -10378,7 +10477,6 @@ cancel_indirdep(indirdep, bp, freeblks) LIST_REMOVE(indirdep, ir_next); } indirdep->ir_state |= GOINGAWAY; - VFSTOUFS(indirdep->ir_list.wk_mp)->softdep_numindirdeps += 1; /* * Pass in bp for blocks still have journal writes * pending so we can cancel them on their own. @@ -10805,7 +10903,7 @@ softdep_disk_write_complete(bp) ACQUIRE_LOCK(ump); while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) { WORKLIST_REMOVE(wk); - dep_write[wk->wk_type]++; + atomic_add_long(&dep_write[wk->wk_type], 1); if (wk == owk) panic("duplicate worklist: %p\n", wk); owk = wk; @@ -11488,7 +11586,7 @@ diradd_inode_written(dap, inodedep) /* * Returns true if the bmsafemap will have rollbacks when written. Must only - * be called with the soft updates lock and the buf lock on the cg held. + * be called with the per-filesystem lock and the buf lock on the cg held. */ static int bmsafemap_backgroundwrite(bmsafemap, bp) @@ -12912,18 +13010,42 @@ softdep_slowdown(vp) if (journal_space(ump, 0) == 0) jlow = 1; } + /* + * If the system is under its limits and our filesystem is + * not responsible for more than our share of the usage and + * we are not low on journal space, then no need to slow down. + */ max_softdeps_hard = max_softdeps * 11 / 10; if (dep_current[D_DIRREM] < max_softdeps_hard / 2 && dep_current[D_INODEDEP] < max_softdeps_hard && - VFSTOUFS(vp->v_mount)->softdep_numindirdeps < maxindirdeps && - dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0) { + dep_current[D_INDIRDEP] < max_softdeps_hard / 1000 && + dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0 && + ump->softdep_curdeps[D_DIRREM] < + (max_softdeps_hard / 2) / stat_flush_threads && + ump->softdep_curdeps[D_INODEDEP] < + max_softdeps_hard / stat_flush_threads && + ump->softdep_curdeps[D_INDIRDEP] < + (max_softdeps_hard / 1000) / stat_flush_threads && + ump->softdep_curdeps[D_FREEBLKS] < + max_softdeps_hard / stat_flush_threads) { FREE_LOCK(ump); return (0); } - if (VFSTOUFS(vp->v_mount)->softdep_numindirdeps >= maxindirdeps || jlow) - softdep_speedup(); + /* + * If the journal is low or our filesystem is over its limit + * then speedup the cleanup. + */ + if (ump->softdep_curdeps[D_INDIRDEP] < + (max_softdeps_hard / 1000) / stat_flush_threads || jlow) + softdep_speedup(ump); stat_sync_limit_hit += 1; FREE_LOCK(ump); + /* + * We only slow down the rate at which new dependencies are + * generated if we are not using journaling. With journaling, + * the cleanup should always be sufficient to keep things + * under control. + */ if (DOINGSUJ(vp)) return (0); return (1); @@ -12981,13 +13103,12 @@ softdep_request_cleanup(fs, vp, cred, re return (0); } /* - * If we are in need of resources, consider pausing for - * tickdelay to give ourselves some breathing room. + * If we are in need of resources, start by cleaning up + * any block removals associated with our inode. */ ACQUIRE_LOCK(ump); process_removes(vp); process_truncates(vp); - request_cleanup(UFSTOVFS(ump), resource); FREE_LOCK(ump); /* * Now clean up at least as many resources as we will need. @@ -13120,7 +13241,7 @@ request_cleanup(mp, resource) * Next, we attempt to speed up the syncer process. If that * is successful, then we allow the process to continue. */ - if (softdep_speedup() && + if (softdep_speedup(ump) && resource != FLUSH_BLOCKS_WAIT && resource != FLUSH_INODES_WAIT) return(0); @@ -13138,15 +13259,19 @@ request_cleanup(mp, resource) case FLUSH_INODES: case FLUSH_INODES_WAIT: + ACQUIRE_GBLLOCK(&lk); stat_ino_limit_push += 1; req_clear_inodedeps += 1; + FREE_GBLLOCK(&lk); stat_countp = &stat_ino_limit_hit; break; case FLUSH_BLOCKS: case FLUSH_BLOCKS_WAIT: + ACQUIRE_GBLLOCK(&lk); stat_blk_limit_push += 1; req_clear_remove += 1; + FREE_GBLLOCK(&lk); stat_countp = &stat_blk_limit_hit; break; @@ -13157,6 +13282,8 @@ request_cleanup(mp, resource) * Hopefully the syncer daemon will catch up and awaken us. * We wait at most tickdelay before proceeding in any case. */ + ACQUIRE_GBLLOCK(&lk); + FREE_LOCK(ump); proc_waiting += 1; if (callout_pending(&softdep_callout) == FALSE) callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2, @@ -13164,6 +13291,8 @@ request_cleanup(mp, resource) msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0); proc_waiting -= 1; + FREE_GBLLOCK(&lk); + ACQUIRE_LOCK(ump); return (1); } @@ -13177,16 +13306,13 @@ pause_timer(arg) void *arg; { - rw_assert(&lk, RA_WLOCKED); + GBLLOCK_OWNED(&lk); /* * The callout_ API has acquired mtx and will hold it around this * function call. */ - *stat_countp += 1; - wakeup_one(&proc_waiting); - if (proc_waiting > 0) - callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2, - pause_timer, 0); + *stat_countp += proc_waiting; + wakeup(&proc_waiting); } /* @@ -13197,7 +13323,6 @@ check_clear_deps(mp) struct mount *mp; { - rw_assert(&lk, RA_WLOCKED); /* * If we are suspended, it may be because of our using * too many inodedeps, so help clear them out. @@ -13207,16 +13332,22 @@ check_clear_deps(mp) /* * General requests for cleanup of backed up dependencies */ + ACQUIRE_GBLLOCK(&lk); if (req_clear_inodedeps) { req_clear_inodedeps -= 1; + FREE_GBLLOCK(&lk); clear_inodedeps(mp); - wakeup_one(&proc_waiting); + ACQUIRE_GBLLOCK(&lk); + wakeup(&proc_waiting); } if (req_clear_remove) { req_clear_remove -= 1; + FREE_GBLLOCK(&lk); clear_remove(mp); - wakeup_one(&proc_waiting); + ACQUIRE_GBLLOCK(&lk); + wakeup(&proc_waiting); } + FREE_GBLLOCK(&lk); } /* Modified: head/sys/ufs/ffs/softdep.h ============================================================================== --- head/sys/ufs/ffs/softdep.h Mon Aug 4 21:41:01 2014 (r269532) +++ head/sys/ufs/ffs/softdep.h Mon Aug 4 22:03:58 2014 (r269533) @@ -1025,7 +1025,7 @@ TAILQ_HEAD(indir_hashhead, freework); * Allocated at mount and freed at unmount. */ struct mount_softdeps { - struct rwlock *sd_fslock; /* softdep lock */ + struct rwlock sd_fslock; /* softdep lock */ struct workhead sd_workitem_pending; /* softdep work queue */ struct worklist *sd_worklist_tail; /* Tail pointer for above */ struct workhead sd_journal_pending; /* journal work queue */ @@ -1046,15 +1046,24 @@ struct mount_softdeps { u_long sd_bmhashsize; /* bmsafemap hash table size-1*/ struct indir_hashhead *sd_indirhash; /* indir hash table */ u_long sd_indirhashsize; /* indir hash table size-1 */ - long sd_numindirdeps; /* outstanding indirdeps */ int sd_on_journal; /* Items on the journal list */ int sd_on_worklist; /* Items on the worklist */ int sd_deps; /* Total dependency count */ int sd_accdeps; /* accumulated dep count */ int sd_req; /* Wakeup when deps hits 0. */ + int sd_flags; /* comm with flushing thread */ + int sd_cleanups; /* Calls to cleanup */ + struct thread *sd_flushtd; /* thread handling flushing */ + TAILQ_ENTRY(mount_softdeps) sd_next; /* List of softdep filesystem */ + struct ufsmount *sd_ump; /* our ufsmount structure */ u_long sd_curdeps[D_LAST + 1]; /* count of current deps */ }; /* + * Flags for communicating with the syncer thread. + */ +#define FLUSH_EXIT 0x0001 /* time to exit */ +#define FLUSH_CLEANUP 0x0002 /* need to clear out softdep structures */ +/* * Keep the old names from when these were in the ufsmount structure. */ #define softdep_workitem_pending um_softdep->sd_workitem_pending @@ -1077,10 +1086,11 @@ struct mount_softdeps { #define bmsafemap_hash_size um_softdep->sd_bmhashsize #define indir_hashtbl um_softdep->sd_indirhash #define indir_hash_size um_softdep->sd_indirhashsize -#define softdep_numindirdeps um_softdep->sd_numindirdeps #define softdep_on_journal um_softdep->sd_on_journal #define softdep_on_worklist um_softdep->sd_on_worklist #define softdep_deps um_softdep->sd_deps #define softdep_accdeps um_softdep->sd_accdeps #define softdep_req um_softdep->sd_req +#define softdep_flags um_softdep->sd_flags +#define softdep_flushtd um_softdep->sd_flushtd #define softdep_curdeps um_softdep->sd_curdeps