Date: Tue, 13 Mar 2018 18:33:51 +0000 (UTC) From: Mark Johnston <markj@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r330873 - in user/markj/vm-playground/sys: kern sys vm Message-ID: <201803131833.w2DIXpWU071061@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: markj Date: Tue Mar 13 18:33:50 2018 New Revision: 330873 URL: https://svnweb.freebsd.org/changeset/base/330873 Log: Merge from user/jeff/numa at r330828. Modified: user/markj/vm-playground/sys/kern/kern_cpuset.c user/markj/vm-playground/sys/kern/vfs_bio.c user/markj/vm-playground/sys/sys/_bitset.h user/markj/vm-playground/sys/sys/domainset.h user/markj/vm-playground/sys/sys/proc.h user/markj/vm-playground/sys/vm/vm_domainset.c user/markj/vm-playground/sys/vm/vm_page.c user/markj/vm-playground/sys/vm/vm_pageout.c user/markj/vm-playground/sys/vm/vm_phys.h user/markj/vm-playground/sys/vm/vm_reserv.c user/markj/vm-playground/sys/vm/vnode_pager.c Directory Properties: user/markj/vm-playground/ (props changed) Modified: user/markj/vm-playground/sys/kern/kern_cpuset.c ============================================================================== --- user/markj/vm-playground/sys/kern/kern_cpuset.c Tue Mar 13 18:30:26 2018 (r330872) +++ user/markj/vm-playground/sys/kern/kern_cpuset.c Tue Mar 13 18:33:50 2018 (r330873) @@ -37,6 +37,8 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/sysctl.h> +#include <sys/ctype.h> #include <sys/sysproto.h> #include <sys/jail.h> #include <sys/kernel.h> @@ -112,6 +114,9 @@ __FBSDID("$FreeBSD$"); * meaning 'curthread'. It may query available cpus for that tid with a * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...). */ + +LIST_HEAD(domainlist, domainset); + static uma_zone_t cpuset_zone; static uma_zone_t domainset_zone; static struct mtx cpuset_lock; @@ -119,6 +124,7 @@ static struct setlist cpuset_ids; static struct domainlist cpuset_domains; static struct unrhdr *cpuset_unr; static struct cpuset *cpuset_zero, *cpuset_default, *cpuset_kernel; +static struct domainset domainset0, domainset2; /* Return the size of cpuset_t at the kernel level */ SYSCTL_INT(_kern_sched, OID_AUTO, cpusetsize, CTLFLAG_RD | CTLFLAG_CAPRD, @@ -477,11 +483,24 @@ _domainset_create(struct domainset *domain, struct dom /* * Create or lookup a domainset based on the key held in 'domain'. */ -static struct domainset * +struct domainset * domainset_create(const struct domainset *domain) { struct domainset *ndomain; + /* + * Validate the policy. It must specify a useable policy number with + * only valid domains. Preferred must include the preferred domain + * in the mask. + */ + if (domain->ds_policy <= DOMAINSET_POLICY_INVALID || + domain->ds_policy > DOMAINSET_POLICY_MAX) + return (NULL); + if (domain->ds_policy == DOMAINSET_POLICY_PREFER && + !DOMAINSET_ISSET(domain->ds_prefer, &domain->ds_mask)) + return (NULL); + if (!DOMAINSET_SUBSET(&domainset0.ds_mask, &domain->ds_mask)) + return (NULL); ndomain = uma_zalloc(domainset_zone, M_WAITOK | M_ZERO); domainset_copy(domain, ndomain); return _domainset_create(ndomain, NULL); @@ -1132,6 +1151,55 @@ out: return (error); } +static int +bitset_strprint(char *buf, size_t bufsiz, const struct bitset *set, int setlen) +{ + size_t bytes; + int i, once; + char *p; + + once = 0; + p = buf; + for (i = 0; i < __bitset_words(setlen); i++) { + if (once != 0) { + if (bufsiz < 1) + return (0); + *p = ','; + p++; + bufsiz--; + } else + once = 1; + if (bufsiz < sizeof(__STRING(ULONG_MAX))) + return (0); + bytes = snprintf(p, bufsiz, "%lx", set->__bits[i]); + p += bytes; + bufsiz -= bytes; + } + return (p - buf); +} + +static int +bitset_strscan(struct bitset *set, int setlen, const char *buf) +{ + int i, ret; + const char *p; + + BIT_ZERO(setlen, set); + p = buf; + for (i = 0; i < __bitset_words(setlen); i++) { + if (*p == ',') { + p++; + continue; + } + ret = sscanf(p, "%lx", &set->__bits[i]); + if (ret == 0 || ret == -1) + break; + while (isxdigit(*p)) + p++; + } + return (p - buf); +} + /* * Return a string representing a valid layout for a cpuset_t object. * It expects an incoming buffer at least sized as CPUSETBUFSIZ. @@ -1139,19 +1207,9 @@ out: char * cpusetobj_strprint(char *buf, const cpuset_t *set) { - char *tbuf; - size_t i, bytesp, bufsiz; - tbuf = buf; - bytesp = 0; - bufsiz = CPUSETBUFSIZ; - - for (i = 0; i < (_NCPUWORDS - 1); i++) { - bytesp = snprintf(tbuf, bufsiz, "%lx,", set->__bits[i]); - bufsiz -= bytesp; - tbuf += bytesp; - } - snprintf(tbuf, bufsiz, "%lx", set->__bits[_NCPUWORDS - 1]); + bitset_strprint(buf, CPUSETBUFSIZ, (const struct bitset *)set, + CPU_SETSIZE); return (buf); } @@ -1162,37 +1220,71 @@ cpusetobj_strprint(char *buf, const cpuset_t *set) int cpusetobj_strscan(cpuset_t *set, const char *buf) { - u_int nwords; - int i, ret; + char p; if (strlen(buf) > CPUSETBUFSIZ - 1) return (-1); - /* Allow to pass a shorter version of the mask when necessary. */ - nwords = 1; - for (i = 0; buf[i] != '\0'; i++) - if (buf[i] == ',') - nwords++; - if (nwords > _NCPUWORDS) + p = buf[bitset_strscan((struct bitset *)set, CPU_SETSIZE, buf)]; + if (p != '\0') return (-1); - CPU_ZERO(set); - for (i = 0; i < (nwords - 1); i++) { - ret = sscanf(buf, "%lx,", &set->__bits[i]); - if (ret == 0 || ret == -1) - return (-1); - buf = strstr(buf, ","); - if (buf == NULL) - return (-1); - buf++; - } - ret = sscanf(buf, "%lx", &set->__bits[nwords - 1]); - if (ret == 0 || ret == -1) - return (-1); return (0); } /* + * Handle a domainset specifier in the sysctl tree. A poiner to a pointer to + * a domainset is in arg1. If the user specifies a valid domainset the + * pointer is updated. + * + * Format is: + * hex mask word 0,hex mask word 1,...:decimal policy:decimal preferred + */ +int +sysctl_handle_domainset(SYSCTL_HANDLER_ARGS) +{ + char buf[DOMAINSETBUFSIZ]; + struct domainset *dset; + struct domainset key; + char *p; + int error; + + dset = *(struct domainset **)arg1; + error = 0; + + if (dset != NULL) { + p = buf + bitset_strprint(buf, DOMAINSETBUFSIZ, + (const struct bitset *)&dset->ds_mask, DOMAINSET_SETSIZE); + sprintf(p, ":%d:%d", dset->ds_policy, dset->ds_prefer); + } else + sprintf(buf, "<NULL>"); + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return (error); + + /* + * Read in and validate the string. + */ + memset(&key, 0, sizeof(key)); + p = &buf[bitset_strscan((struct bitset *)&key.ds_mask, + DOMAINSET_SETSIZE, buf)]; + if (p == buf) + return (EINVAL); + if (sscanf(p, ":%hd:%hhd", &key.ds_policy, &key.ds_prefer) != 2) + return (EINVAL); + + /* Domainset_create() validates the policy.*/ + dset = domainset_create(&key); + if (dset == NULL) + return (EINVAL); + *(struct domainset **)arg1 = dset; + + return (error); +} + +#ifdef DDB + +/* * Apply an anonymous mask or a domain to a single thread. */ static int @@ -1256,8 +1348,6 @@ cpuset_setithread(lwpid_t id, int cpu) /* * Create the domainset for cpuset 0, 1 and cpuset 2. */ -static struct domainset domainset0, domainset2; - void domainset_zero(void) { @@ -2079,8 +2169,6 @@ out: return (error); } -#ifdef DDB -BITSET_DEFINE(bitset, 1); static void ddb_display_bitset(const struct bitset *set, int size) { Modified: user/markj/vm-playground/sys/kern/vfs_bio.c ============================================================================== --- user/markj/vm-playground/sys/kern/vfs_bio.c Tue Mar 13 18:30:26 2018 (r330872) +++ user/markj/vm-playground/sys/kern/vfs_bio.c Tue Mar 13 18:33:50 2018 (r330873) @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/bio.h> +#include <sys/bitset.h> #include <sys/conf.h> #include <sys/counter.h> #include <sys/buf.h> @@ -100,6 +101,7 @@ struct buf_ops buf_ops_bio = { .bop_bdflush = bufbdflush, }; +struct bufdomain; static struct buf *buf; /* buffer header pool */ extern struct buf *swbuf; /* Swap buffer header pool. */ caddr_t unmapped_buf; @@ -123,8 +125,8 @@ static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno); static void breada(struct vnode *, daddr_t *, int *, int, struct ucred *, int, void (*)(struct buf *)); -static int buf_flush(struct vnode *vp, int); -static int flushbufqueues(struct vnode *, int, int); +static int buf_flush(struct vnode *vp, struct bufdomain *, int); +static int flushbufqueues(struct vnode *, struct bufdomain *, int, int); static void buf_daemon(void); static __inline void bd_wakeup(void); static int sysctl_runningspace(SYSCTL_HANDLER_ARGS); @@ -133,6 +135,7 @@ static void bufkva_free(struct buf *); static int buf_import(void *, void **, int, int, int); static void buf_release(void *, void **, int); static void maxbcachebuf_adjust(void); +static inline struct bufdomain *bufdomain(struct buf *); static int sysctl_bufspace(SYSCTL_HANDLER_ARGS); int vmiodirenable = TRUE; @@ -147,22 +150,22 @@ static counter_u64_t bufkvaspace; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, bufkvaspace, CTLFLAG_RD, &bufkvaspace, "Kernel virtual memory used for buffers"); static long maxbufspace; -SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW, &maxbufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD, &maxbufspace, 0, "Maximum allowed value of bufspace (including metadata)"); static long bufmallocspace; SYSCTL_LONG(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD, &bufmallocspace, 0, "Amount of malloced memory for buffers"); static long maxbufmallocspace; -SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, +SYSCTL_LONG(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RD, &maxbufmallocspace, 0, "Maximum amount of malloced memory for buffers"); static long lobufspace; -SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RW, &lobufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0, "Minimum amount of buffers we want to have"); long hibufspace; -SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RW, &hibufspace, 0, +SYSCTL_LONG(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0, "Maximum allowed value of bufspace (excluding metadata)"); long bufspacethresh; -SYSCTL_LONG(_vfs, OID_AUTO, bufspacethresh, CTLFLAG_RW, &bufspacethresh, +SYSCTL_LONG(_vfs, OID_AUTO, bufspacethresh, CTLFLAG_RD, &bufspacethresh, 0, "Bufspace consumed before waking the daemon to free some"); static counter_u64_t buffreekvacnt; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW, &buffreekvacnt, @@ -190,26 +193,27 @@ SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_R static int recursiveflushes; SYSCTL_INT(_vfs, OID_AUTO, recursiveflushes, CTLFLAG_RW, &recursiveflushes, 0, "Number of flushes skipped due to being recursive"); -static int numdirtybuffers; -SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD, &numdirtybuffers, 0, +static int sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_vfs, OID_AUTO, numdirtybuffers, + CTLTYPE_INT|CTLFLAG_MPSAFE|CTLFLAG_RD, NULL, 0, sysctl_numdirtybuffers, "I", "Number of buffers that are dirty (has unwritten changes) at the moment"); static int lodirtybuffers; -SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0, +SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RD, &lodirtybuffers, 0, "How many buffers we want to have free before bufdaemon can sleep"); static int hidirtybuffers; -SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0, +SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RD, &hidirtybuffers, 0, "When the number of dirty buffers is considered severe"); int dirtybufthresh; -SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh, +SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RD, &dirtybufthresh, 0, "Number of bdwrite to bawrite conversions to clear dirty buffers"); static int numfreebuffers; SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD, &numfreebuffers, 0, "Number of free buffers"); static int lofreebuffers; -SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW, &lofreebuffers, 0, +SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RD, &lofreebuffers, 0, "Target number of free buffers"); static int hifreebuffers; -SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW, &hifreebuffers, 0, +SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RD, &hifreebuffers, 0, "Threshold for clean buffer recycling"); static counter_u64_t getnewbufcalls; SYSCTL_COUNTER_U64(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RD, @@ -308,10 +312,10 @@ struct bufqueue { #define BQ_ASSERT_LOCKED(bq) mtx_assert(BQ_LOCKPTR((bq)), MA_OWNED) struct bufqueue __exclusive_cache_line bqempty; -struct bufqueue __exclusive_cache_line bqdirty; struct bufdomain { struct bufqueue bd_subq[MAXCPU + 1]; /* Per-cpu sub queues + global */ + struct bufqueue bd_dirtyq; struct bufqueue *bd_cleanq; struct mtx_padalign bd_run_lock; /* Constants */ @@ -321,10 +325,14 @@ struct bufdomain { long bd_bufspacethresh; int bd_hifreebuffers; int bd_lofreebuffers; + int bd_hidirtybuffers; + int bd_lodirtybuffers; + int bd_dirtybufthresh; int bd_lim; /* atomics */ int bd_wanted; - int __aligned(CACHE_LINE_SIZE) bd_running; + int __aligned(CACHE_LINE_SIZE) bd_numdirtybuffers; + int __aligned(CACHE_LINE_SIZE) bd_running; long __aligned(CACHE_LINE_SIZE) bd_bufspace; int __aligned(CACHE_LINE_SIZE) bd_freebuffers; } __aligned(CACHE_LINE_SIZE); @@ -336,15 +344,19 @@ struct bufdomain { #define BD_RUN_LOCKPTR(bd) (&(bd)->bd_run_lock) #define BD_RUN_LOCK(bd) mtx_lock(BD_RUN_LOCKPTR((bd))) #define BD_RUN_UNLOCK(bd) mtx_unlock(BD_RUN_LOCKPTR((bd))) -#define BD_DOMAIN(bd) (bd - bdclean) +#define BD_DOMAIN(bd) (bd - bdomain) -/* Maximum number of clean buffer domains. */ -#define CLEAN_DOMAINS 8 +/* Maximum number of buffer domains. */ +#define BUF_DOMAINS 8 +BITSET_DEFINE(bufdomainset, BUF_DOMAINS); +struct bufdomainset bdlodirty; /* Domains > lodirty */ +struct bufdomainset bdhidirty; /* Domains > hidirty */ + /* Configured number of clean queues. */ -static int __read_mostly clean_domains; +static int __read_mostly buf_domains; -struct bufdomain __exclusive_cache_line bdclean[CLEAN_DOMAINS]; +struct bufdomain __exclusive_cache_line bdomain[BUF_DOMAINS]; static void bq_remove(struct bufqueue *bq, struct buf *bp); static void bq_insert(struct bufqueue *bq, struct buf *bp, bool unlock); @@ -403,8 +415,8 @@ sysctl_bufspace(SYSCTL_HANDLER_ARGS) int i; lvalue = 0; - for (i = 0; i < clean_domains; i++) - lvalue += bdclean[i].bd_bufspace; + for (i = 0; i < buf_domains; i++) + lvalue += bdomain[i].bd_bufspace; if (sizeof(int) == sizeof(long) || req->oldlen >= sizeof(long)) return (sysctl_handle_long(oidp, &lvalue, 0, req)); if (lvalue > INT_MAX) @@ -421,12 +433,24 @@ sysctl_bufspace(SYSCTL_HANDLER_ARGS) int i; lvalue = 0; - for (i = 0; i < clean_domains; i++) - lvalue += bdclean[i].bd_bufspace; + for (i = 0; i < buf_domains; i++) + lvalue += bdomain[i].bd_bufspace; return (sysctl_handle_long(oidp, &lvalue, 0, req)); } #endif +static int +sysctl_numdirtybuffers(SYSCTL_HANDLER_ARGS) +{ + int value; + int i; + + value = 0; + for (i = 0; i < buf_domains; i++) + value += bdomain[i].bd_numdirtybuffers; + return (sysctl_handle_int(oidp, &value, 0, req)); +} + /* * bdirtywakeup: * @@ -444,18 +468,59 @@ bdirtywakeup(void) } /* + * bd_clear: + * + * Clear a domain from the appropriate bitsets when dirtybuffers + * is decremented. + */ +static void +bd_clear(struct bufdomain *bd) +{ + + mtx_lock(&bdirtylock); + if (bd->bd_numdirtybuffers <= bd->bd_lodirtybuffers) + BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty); + if (bd->bd_numdirtybuffers <= bd->bd_hidirtybuffers) + BIT_CLR(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty); + mtx_unlock(&bdirtylock); +} + +/* + * bd_set: + * + * Set a domain in the appropriate bitsets when dirtybuffers + * is incremented. + */ +static void +bd_set(struct bufdomain *bd) +{ + + mtx_lock(&bdirtylock); + if (bd->bd_numdirtybuffers > bd->bd_lodirtybuffers) + BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdlodirty); + if (bd->bd_numdirtybuffers > bd->bd_hidirtybuffers) + BIT_SET(BUF_DOMAINS, BD_DOMAIN(bd), &bdhidirty); + mtx_unlock(&bdirtylock); +} + +/* * bdirtysub: * * Decrement the numdirtybuffers count by one and wakeup any * threads blocked in bwillwrite(). */ static void -bdirtysub(void) +bdirtysub(struct buf *bp) { + struct bufdomain *bd; + int num; - if (atomic_fetchadd_int(&numdirtybuffers, -1) == - (lodirtybuffers + hidirtybuffers) / 2) + bd = bufdomain(bp); + num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, -1); + if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2) bdirtywakeup(); + if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers) + bd_clear(bd); } /* @@ -465,16 +530,21 @@ bdirtysub(void) * daemon if needed. */ static void -bdirtyadd(void) +bdirtyadd(struct buf *bp) { + struct bufdomain *bd; + int num; /* * Only do the wakeup once as we cross the boundary. The * buf daemon will keep running until the condition clears. */ - if (atomic_fetchadd_int(&numdirtybuffers, 1) == - (lodirtybuffers + hidirtybuffers) / 2) + bd = bufdomain(bp); + num = atomic_fetchadd_int(&bd->bd_numdirtybuffers, 1); + if (num == (bd->bd_lodirtybuffers + bd->bd_hidirtybuffers) / 2) bd_wakeup(); + if (num == bd->bd_lodirtybuffers || num == bd->bd_hidirtybuffers) + bd_set(bd); } /* @@ -539,11 +609,11 @@ bufspace_adjust(struct buf *bp, int bufsize) KASSERT((bp->b_flags & B_MALLOC) == 0, ("bufspace_adjust: malloc buf %p", bp)); - bd = &bdclean[bp->b_domain]; + bd = bufdomain(bp); diff = bufsize - bp->b_bufsize; if (diff < 0) { atomic_subtract_long(&bd->bd_bufspace, -diff); - } else { + } else if (diff > 0) { space = atomic_fetchadd_long(&bd->bd_bufspace, diff); /* Wake up the daemon on the transition. */ if (space < bd->bd_bufspacethresh && @@ -638,7 +708,7 @@ bufspace_wait(struct bufdomain *bd, struct vnode *vp, * recursion. */ td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF; - fl = buf_flush(vp, flushbufqtarget); + fl = buf_flush(vp, bd, flushbufqtarget); td->td_pflags &= norunbuf; BD_LOCK(bd); if (fl != 0) @@ -700,7 +770,6 @@ bufspace_daemon(void *arg) if (buf_recycle(bd, false) != 0) { if (bd_flushall(bd)) continue; - bd_speedup(); BD_LOCK(bd); if (bd->bd_wanted) { msleep(&bd->bd_wanted, BD_LOCKPTR(bd), @@ -1026,7 +1095,6 @@ bufinit(void) ("maxbcachebuf (%d) must be >= MAXBSIZE (%d)\n", maxbcachebuf, MAXBSIZE)); bq_init(&bqempty, QUEUE_EMPTY, -1, "bufq empty lock"); - bq_init(&bqdirty, QUEUE_DIRTY, -1, "bufq dirty lock"); mtx_init(&rbreqlock, "runningbufspace lock", NULL, MTX_DEF); mtx_init(&bdlock, "buffer daemon lock", NULL, MTX_DEF); mtx_init(&bdirtylock, "dirty buf lock", NULL, MTX_DEF); @@ -1094,7 +1162,6 @@ bufinit(void) */ hidirtybuffers = nbuf / 4 + 20; dirtybufthresh = hidirtybuffers * 9 / 10; - numdirtybuffers = 0; /* * To support extreme low-memory systems, make sure hidirtybuffers * cannot eat up all available buffer space. This occurs when our @@ -1129,22 +1196,26 @@ bufinit(void) * One queue per-256mb up to the max. More queues gives better * concurrency but less accurate LRU. */ - clean_domains = MIN(howmany(maxbufspace, 256*1024*1024), CLEAN_DOMAINS); - for (i = 0 ; i < clean_domains; i++) { + buf_domains = MIN(howmany(maxbufspace, 256*1024*1024), BUF_DOMAINS); + for (i = 0 ; i < buf_domains; i++) { struct bufdomain *bd; - bd = &bdclean[i]; + bd = &bdomain[i]; bd_init(bd); - bd->bd_freebuffers = nbuf / clean_domains; - bd->bd_hifreebuffers = hifreebuffers / clean_domains; - bd->bd_lofreebuffers = lofreebuffers / clean_domains; + bd->bd_freebuffers = nbuf / buf_domains; + bd->bd_hifreebuffers = hifreebuffers / buf_domains; + bd->bd_lofreebuffers = lofreebuffers / buf_domains; bd->bd_bufspace = 0; - bd->bd_maxbufspace = maxbufspace / clean_domains; - bd->bd_hibufspace = hibufspace / clean_domains; - bd->bd_lobufspace = lobufspace / clean_domains; - bd->bd_bufspacethresh = bufspacethresh / clean_domains; + bd->bd_maxbufspace = maxbufspace / buf_domains; + bd->bd_hibufspace = hibufspace / buf_domains; + bd->bd_lobufspace = lobufspace / buf_domains; + bd->bd_bufspacethresh = bufspacethresh / buf_domains; + bd->bd_numdirtybuffers = 0; + bd->bd_hidirtybuffers = hidirtybuffers / buf_domains; + bd->bd_lodirtybuffers = lodirtybuffers / buf_domains; + bd->bd_dirtybufthresh = dirtybufthresh / buf_domains; /* Don't allow more than 2% of bufs in the per-cpu caches. */ - bd->bd_lim = nbuf / clean_domains / 50 / mp_ncpus; + bd->bd_lim = nbuf / buf_domains / 50 / mp_ncpus; } getnewbufcalls = counter_u64_alloc(M_WAITOK); getnewbufrestarts = counter_u64_alloc(M_WAITOK); @@ -1328,6 +1399,13 @@ bpmap_qenter(struct buf *bp) (vm_offset_t)(bp->b_offset & PAGE_MASK)); } +static inline struct bufdomain * +bufdomain(struct buf *bp) +{ + + return (&bdomain[bp->b_domain]); +} + static struct bufqueue * bufqueue(struct buf *bp) { @@ -1340,9 +1418,9 @@ bufqueue(struct buf *bp) case QUEUE_EMPTY: return (&bqempty); case QUEUE_DIRTY: - return (&bqdirty); + return (&bufdomain(bp)->bd_dirtyq); case QUEUE_CLEAN: - return (&bdclean[bp->b_domain].bd_subq[bp->b_subqueue]); + return (&bufdomain(bp)->bd_subq[bp->b_subqueue]); default: break; } @@ -1405,14 +1483,14 @@ binsfree(struct buf *bp, int qindex) bq_remove(bq, bp); BQ_UNLOCK(bq); } + bd = bufdomain(bp); if (qindex == QUEUE_CLEAN) { - bd = &bdclean[bp->b_domain]; if (bd->bd_lim != 0) bq = &bd->bd_subq[PCPU_GET(cpuid)]; else bq = bd->bd_cleanq; } else - bq = &bqdirty; + bq = &bd->bd_dirtyq; bq_insert(bq, bp, true); } @@ -1440,7 +1518,7 @@ buf_free(struct buf *bp) if (!LIST_EMPTY(&bp->b_dep)) buf_deallocate(bp); bufkva_free(bp); - atomic_add_int(&bdclean[bp->b_domain].bd_freebuffers, 1); + atomic_add_int(&bufdomain(bp)->bd_freebuffers, 1); BUF_UNLOCK(bp); uma_zfree(buf_zone, bp); } @@ -1716,9 +1794,10 @@ bd_init(struct bufdomain *bd) int domain; int i; - domain = bd - bdclean; + domain = bd - bdomain; bd->bd_cleanq = &bd->bd_subq[mp_maxid + 1]; bq_init(bd->bd_cleanq, QUEUE_CLEAN, mp_maxid + 1, "bufq clean lock"); + bq_init(&bd->bd_dirtyq, QUEUE_DIRTY, -1, "bufq dirty lock"); for (i = 0; i <= mp_maxid; i++) bq_init(&bd->bd_subq[i], QUEUE_CLEAN, i, "bufq clean subqueue lock"); @@ -1810,7 +1889,7 @@ bq_insert(struct bufqueue *bq, struct buf *bp, bool un if (bp->b_qindex != QUEUE_NONE) panic("bq_insert: free buffer %p onto another queue?", bp); - bd = &bdclean[bp->b_domain]; + bd = bufdomain(bp); if (bp->b_flags & B_AGE) { /* Place this buf directly on the real queue. */ if (bq->bq_index == QUEUE_CLEAN) @@ -1927,8 +2006,8 @@ bufkva_reclaim(vmem_t *vmem, int flags) done = false; for (i = 0; i < 5; i++) { - for (q = 0; q < clean_domains; q++) - if (buf_recycle(&bdclean[q], true) != 0) + for (q = 0; q < buf_domains; q++) + if (buf_recycle(&bdomain[q], true) != 0) done = true; if (done) break; @@ -2320,7 +2399,7 @@ bdirty(struct buf *bp) if ((bp->b_flags & B_DELWRI) == 0) { bp->b_flags |= /* XXX B_DONE | */ B_DELWRI; reassignbuf(bp); - bdirtyadd(); + bdirtyadd(bp); } } @@ -2348,7 +2427,7 @@ bundirty(struct buf *bp) if (bp->b_flags & B_DELWRI) { bp->b_flags &= ~B_DELWRI; reassignbuf(bp); - bdirtysub(); + bdirtysub(bp); } /* * Since it is now being written, we can clear its deferred write flag. @@ -2420,9 +2499,9 @@ void bwillwrite(void) { - if (numdirtybuffers >= hidirtybuffers) { + if (buf_dirty_count_severe()) { mtx_lock(&bdirtylock); - while (numdirtybuffers >= hidirtybuffers) { + while (buf_dirty_count_severe()) { bdirtywait = 1; msleep(&bdirtywait, &bdirtylock, (PRIBIO + 4), "flswai", 0); @@ -2438,7 +2517,7 @@ int buf_dirty_count_severe(void) { - return(numdirtybuffers >= hidirtybuffers); + return (!BIT_EMPTY(BUF_DOMAINS, &bdhidirty)); } /* @@ -2523,7 +2602,7 @@ brelse(struct buf *bp) if (!LIST_EMPTY(&bp->b_dep)) buf_deallocate(bp); if (bp->b_flags & B_DELWRI) - bdirtysub(); + bdirtysub(bp); bp->b_flags &= ~(B_DELWRI | B_CACHE); if ((bp->b_flags & B_VMIO) == 0) { allocbuf(bp, 0); @@ -3136,9 +3215,9 @@ getnewbuf(struct vnode *vp, int slpflag, int slptimeo, else metadata = false; if (vp == NULL) - bd = &bdclean[0]; + bd = &bdomain[0]; else - bd = &bdclean[vp->v_bufobj.bo_domain]; + bd = &bdomain[vp->v_bufobj.bo_domain]; counter_u64_add(getnewbufcalls, 1); reserved = false; @@ -3184,11 +3263,11 @@ static struct kproc_desc buf_kp = { SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp); static int -buf_flush(struct vnode *vp, int target) +buf_flush(struct vnode *vp, struct bufdomain *bd, int target) { int flushed; - flushed = flushbufqueues(vp, target, 0); + flushed = flushbufqueues(vp, bd, target, 0); if (flushed == 0) { /* * Could not find any buffers without rollback @@ -3197,7 +3276,7 @@ buf_flush(struct vnode *vp, int target) */ if (vp != NULL && target > 2) target /= 2; - flushbufqueues(vp, target, 1); + flushbufqueues(vp, bd, target, 1); } return (flushed); } @@ -3205,6 +3284,8 @@ buf_flush(struct vnode *vp, int target) static void buf_daemon() { + struct bufdomain *bd; + int speedupreq; int lodirty; int i; @@ -3217,11 +3298,11 @@ buf_daemon() /* * Start the buf clean daemons as children threads. */ - for (i = 0 ; i < clean_domains; i++) { + for (i = 0 ; i < buf_domains; i++) { int error; error = kthread_add((void (*)(void *))bufspace_daemon, - &bdclean[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i); + &bdomain[i], curproc, NULL, 0, 0, "bufspacedaemon-%d", i); if (error) panic("error %d spawning bufspace daemon", error); } @@ -3236,20 +3317,30 @@ buf_daemon() mtx_unlock(&bdlock); kproc_suspend_check(bufdaemonproc); - lodirty = lodirtybuffers; - if (bd_speedupreq) { - lodirty = numdirtybuffers / 2; - bd_speedupreq = 0; - } + /* - * Do the flush. Limit the amount of in-transit I/O we - * allow to build up, otherwise we would completely saturate - * the I/O system. + * Save speedupreq for this pass and reset to capture new + * requests. */ - while (numdirtybuffers > lodirty) { - if (buf_flush(NULL, numdirtybuffers - lodirty) == 0) - break; - kern_yield(PRI_USER); + speedupreq = bd_speedupreq; + bd_speedupreq = 0; + + /* + * Flush each domain sequentially according to its level and + * the speedup request. + */ + for (i = 0; i < buf_domains; i++) { + bd = &bdomain[i]; + if (speedupreq) + lodirty = bd->bd_numdirtybuffers / 2; + else + lodirty = bd->bd_lodirtybuffers; + while (bd->bd_numdirtybuffers > lodirty) { + if (buf_flush(NULL, bd, + bd->bd_numdirtybuffers - lodirty) == 0) + break; + kern_yield(PRI_USER); + } } /* @@ -3263,7 +3354,7 @@ buf_daemon() * to avoid endless loops on unlockable buffers. */ mtx_lock(&bdlock); - if (numdirtybuffers <= lodirtybuffers) { + if (!BIT_EMPTY(BUF_DOMAINS, &bdlodirty)) { /* * We reached our low water mark, reset the * request and sleep until we are needed again. @@ -3302,7 +3393,8 @@ SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, 0, "Number of buffers flushed with dependecies that require rollbacks"); static int -flushbufqueues(struct vnode *lvp, int target, int flushdeps) +flushbufqueues(struct vnode *lvp, struct bufdomain *bd, int target, + int flushdeps) { struct bufqueue *bq; struct buf *sentinel; @@ -3315,7 +3407,7 @@ flushbufqueues(struct vnode *lvp, int target, int flus bool unlock; flushed = 0; - bq = &bqdirty; + bq = &bd->bd_dirtyq; bp = NULL; sentinel = malloc(sizeof(struct buf), M_TEMP, M_WAITOK | M_ZERO); sentinel->b_qindex = QUEUE_SENTINEL; @@ -3651,7 +3743,7 @@ bp_unmapped_get_kva(struct buf *bp, daddr_t blkno, int panic("GB_NOWAIT_BD and GB_UNMAPPED %p", bp); } counter_u64_add(mappingrestarts, 1); - bufspace_wait(&bdclean[bp->b_domain], bp->b_vp, gbflags, 0, 0); + bufspace_wait(bufdomain(bp), bp->b_vp, gbflags, 0, 0); } has_addr: if (need_mapping) { @@ -3849,7 +3941,7 @@ loop: */ if (flags & GB_NOCREAT) return NULL; - if (bdclean[bo->bo_domain].bd_freebuffers == 0 && + if (bdomain[bo->bo_domain].bd_freebuffers == 0 && TD_IS_IDLETHREAD(curthread)) return NULL; @@ -3906,7 +3998,7 @@ loop: if (gbincore(bo, blkno)) { BO_UNLOCK(bo); bp->b_flags |= B_INVAL; - bufspace_release(&bdclean[bp->b_domain], maxsize); + bufspace_release(bufdomain(bp), maxsize); brelse(bp); goto loop; } @@ -3941,7 +4033,7 @@ loop: } allocbuf(bp, size); - bufspace_release(&bdclean[bp->b_domain], maxsize); + bufspace_release(bufdomain(bp), maxsize); bp->b_flags &= ~B_DONE; } CTR4(KTR_BUF, "getblk(%p, %ld, %d) = %p", vp, (long)blkno, size, bp); @@ -3970,7 +4062,7 @@ geteblk(int size, int flags) return (NULL); } allocbuf(bp, size); - bufspace_release(&bdclean[bp->b_domain], maxsize); + bufspace_release(bufdomain(bp), maxsize); bp->b_flags |= B_INVAL; /* b_dep cleared by getnewbuf() */ BUF_ASSERT_HELD(bp); return (bp); @@ -4839,7 +4931,7 @@ bufobj_init(struct bufobj *bo, void *private) static volatile int bufobj_cleanq; bo->bo_domain = - atomic_fetchadd_int(&bufobj_cleanq, 1) % clean_domains; + atomic_fetchadd_int(&bufobj_cleanq, 1) % buf_domains; rw_init(BO_LOCKPTR(bo), "bufobj interlock"); bo->bo_private = private; TAILQ_INIT(&bo->bo_clean.bv_hd); @@ -5184,10 +5276,9 @@ DB_SHOW_COMMAND(bufqueues, bufqueues) int i, j; db_printf("bqempty: %d\n", bqempty.bq_len); - db_printf("bqdirty: %d\n", bqdirty.bq_len); - for (i = 0; i < clean_domains; i++) { - bd = &bdclean[i]; + for (i = 0; i < buf_domains; i++) { + bd = &bdomain[i]; db_printf("Buf domain %d\n", i); db_printf("\tfreebufs\t%d\n", bd->bd_freebuffers); db_printf("\tlofreebufs\t%d\n", bd->bd_lofreebuffers); @@ -5199,7 +5290,13 @@ DB_SHOW_COMMAND(bufqueues, bufqueues) db_printf("\tlobufspace\t%ld\n", bd->bd_lobufspace); db_printf("\tbufspacethresh\t%ld\n", bd->bd_bufspacethresh); db_printf("\n"); + db_printf("\tnumdirtybuffers\t%d\n", bd->bd_numdirtybuffers); + db_printf("\tlodirtybuffers\t%d\n", bd->bd_lodirtybuffers); + db_printf("\thidirtybufferss\t%d\n", bd->bd_hidirtybuffers); + db_printf("\tdirtybufthresh\t%d\n", bd->bd_dirtybufthresh); + db_printf("\n"); db_printf("\tcleanq count\t%d\n", bd->bd_cleanq->bq_len); + db_printf("\tdirtyq count\t%d\n", bd->bd_dirtyq.bq_len); db_printf("\twakeup\t\t%d\n", bd->bd_wanted); db_printf("\tlim\t\t%d\n", bd->bd_lim); db_printf("\tCPU "); Modified: user/markj/vm-playground/sys/sys/_bitset.h ============================================================================== --- user/markj/vm-playground/sys/sys/_bitset.h Tue Mar 13 18:30:26 2018 (r330872) +++ user/markj/vm-playground/sys/sys/_bitset.h Tue Mar 13 18:33:50 2018 (r330873) @@ -57,4 +57,10 @@ struct t { \ */ #define BITSET_DEFINE_VAR(t) BITSET_DEFINE(t, 1) +/* + * Define a default type that can be used while manually specifying size + * to every call. + */ +BITSET_DEFINE(bitset, 1); + #endif /* !_SYS__BITSET_H_ */ Modified: user/markj/vm-playground/sys/sys/domainset.h ============================================================================== --- user/markj/vm-playground/sys/sys/domainset.h Tue Mar 13 18:30:26 2018 (r330872) +++ user/markj/vm-playground/sys/sys/domainset.h Tue Mar 13 18:33:50 2018 (r330873) @@ -28,8 +28,8 @@ * $FreeBSD$ */ -#ifndef _SYS_DOMAINSETSET_H_ -#define _SYS_DOMAINSETSET_H_ +#ifndef _SYS_DOMAINSET_H_ +#define _SYS_DOMAINSET_H_ #include <sys/_domainset.h> @@ -38,8 +38,12 @@ #define _NDOMAINSETBITS _BITSET_BITS #define _NDOMAINSETWORDS __bitset_words(DOMAINSET_SETSIZE) -#define DOMAINSETSETBUFSIZ ((2 + sizeof(long) * 2) * _NDOMAINSETWORDS) +#define DOMAINSETBUFSIZ \ + (((2 + sizeof(long) * 2) * _NDOMAINSETWORDS) + \ + sizeof("::") + sizeof(__XSTRING(DOMAINSET_POLICY_MAX)) + \ + sizeof(__XSTRING(MAXMEMDOM))) + #define DOMAINSET_CLR(n, p) BIT_CLR(DOMAINSET_SETSIZE, n, p) #define DOMAINSET_COPY(f, t) BIT_COPY(DOMAINSET_SETSIZE, f, t) #define DOMAINSET_ISSET(n, p) BIT_ISSET(DOMAINSET_SETSIZE, n, p) @@ -77,9 +81,6 @@ #define DOMAINSET_POLICY_MAX DOMAINSET_POLICY_INTERLEAVE #ifdef _KERNEL -#include <sys/queue.h> -LIST_HEAD(domainlist, domainset); - #if MAXMEMDOM < 256 typedef uint8_t domainid_t; #else *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803131833.w2DIXpWU071061>