Date: Fri, 15 Dec 2017 23:35:20 +0000 (UTC) From: Jeff Roberson <jeff@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r326889 - in user/jeff/numa: lib/libc/sys sys/compat/freebsd32 sys/conf sys/kern sys/sys sys/vm usr.bin/cpuset Message-ID: <201712152335.vBFNZKxj049606@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jeff Date: Fri Dec 15 23:35:20 2017 New Revision: 326889 URL: https://svnweb.freebsd.org/changeset/base/326889 Log: First cut of NUMA domain integration into cpuset. Added: user/jeff/numa/sys/sys/_domainset.h (contents, props changed) user/jeff/numa/sys/sys/domainset.h (contents, props changed) user/jeff/numa/sys/vm/vm_domainset.c (contents, props changed) user/jeff/numa/sys/vm/vm_domainset.h (contents, props changed) Modified: user/jeff/numa/lib/libc/sys/Symbol.map user/jeff/numa/sys/compat/freebsd32/syscalls.master user/jeff/numa/sys/conf/files user/jeff/numa/sys/kern/init_main.c user/jeff/numa/sys/kern/init_sysent.c user/jeff/numa/sys/kern/kern_cpuset.c user/jeff/numa/sys/kern/kern_exit.c user/jeff/numa/sys/kern/kern_fork.c user/jeff/numa/sys/kern/kern_numa.c user/jeff/numa/sys/kern/kern_thr.c user/jeff/numa/sys/kern/kern_thread.c user/jeff/numa/sys/kern/makesyscalls.sh user/jeff/numa/sys/kern/sched_4bsd.c user/jeff/numa/sys/kern/sched_ule.c user/jeff/numa/sys/kern/syscalls.c user/jeff/numa/sys/kern/syscalls.master user/jeff/numa/sys/kern/systrace_args.c user/jeff/numa/sys/sys/cpuset.h user/jeff/numa/sys/sys/param.h user/jeff/numa/sys/sys/proc.h user/jeff/numa/sys/sys/syscall.h user/jeff/numa/sys/sys/syscall.mk user/jeff/numa/sys/sys/syscallsubr.h user/jeff/numa/sys/sys/sysproto.h user/jeff/numa/sys/vm/uma_core.c user/jeff/numa/sys/vm/vm_fault.c user/jeff/numa/sys/vm/vm_kern.c user/jeff/numa/sys/vm/vm_object.c user/jeff/numa/sys/vm/vm_object.h user/jeff/numa/sys/vm/vm_page.c user/jeff/numa/sys/vm/vm_phys.c user/jeff/numa/usr.bin/cpuset/cpuset.c Modified: user/jeff/numa/lib/libc/sys/Symbol.map ============================================================================== --- user/jeff/numa/lib/libc/sys/Symbol.map Fri Dec 15 23:19:49 2017 (r326888) +++ user/jeff/numa/lib/libc/sys/Symbol.map Fri Dec 15 23:35:20 2017 (r326889) @@ -398,6 +398,8 @@ FBSD_1.5 { mknodat; stat; statfs; + cpuset_getdomain; + cpuset_setdomain; }; FBSDprivate_1.0 { @@ -1022,4 +1024,8 @@ FBSDprivate_1.0 { gssd_syscall; __libc_interposing_slot; __libc_sigwait; + _cpuset_getdomain; + __sys_cpuset_getdomain; + _cpuset_setdomain; + __sys_cpuset_setdomain; }; Modified: user/jeff/numa/sys/compat/freebsd32/syscalls.master ============================================================================== --- user/jeff/numa/sys/compat/freebsd32/syscalls.master Fri Dec 15 23:19:49 2017 (r326888) +++ user/jeff/numa/sys/compat/freebsd32/syscalls.master Fri Dec 15 23:35:20 2017 (r326889) @@ -1119,4 +1119,13 @@ struct kevent32 *eventlist, \ int nevents, \ const struct timespec32 *timeout); } +561 AUE_NULL STD { int cpuset_getdomain(cpulevel_t level, \ + cpuwhich_t which, id_t id, \ + size_t domainsetsize, domainset_t *mask, \ + int *policy); } +562 AUE_NULL STD { int cpuset_setdomain(cpulevel_t level, \ + cpuwhich_t which, id_t id, \ + size_t domainsetsize, domainset_t *mask, \ + int policy); } + ; vim: syntax=off Modified: user/jeff/numa/sys/conf/files ============================================================================== --- user/jeff/numa/sys/conf/files Fri Dec 15 23:19:49 2017 (r326888) +++ user/jeff/numa/sys/conf/files Fri Dec 15 23:35:20 2017 (r326889) @@ -4816,7 +4816,7 @@ vm/swap_pager.c standard vm/uma_core.c standard vm/uma_dbg.c standard vm/memguard.c optional DEBUG_MEMGUARD -vm/vm_domain.c standard +vm/vm_domainset.c standard vm/vm_fault.c standard vm/vm_glue.c standard vm/vm_init.c standard Modified: user/jeff/numa/sys/kern/init_main.c ============================================================================== --- user/jeff/numa/sys/kern/init_main.c Fri Dec 15 23:19:49 2017 (r326888) +++ user/jeff/numa/sys/kern/init_main.c Fri Dec 15 23:35:20 2017 (r326889) @@ -493,10 +493,7 @@ proc0_init(void *dummy __unused) td->td_flags = TDF_INMEM; td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); - vm_domain_policy_init(&td->td_vm_dom_policy); - vm_domain_policy_set(&td->td_vm_dom_policy, VM_POLICY_NONE, -1); - vm_domain_policy_init(&p->p_vm_dom_policy); - vm_domain_policy_set(&p->p_vm_dom_policy, VM_POLICY_NONE, -1); + td->td_domain.dr_policy = td->td_cpuset->cs_domain; prison0_init(); p->p_peers = 0; p->p_leader = p; Modified: user/jeff/numa/sys/kern/init_sysent.c ============================================================================== --- user/jeff/numa/sys/kern/init_sysent.c Fri Dec 15 23:19:49 2017 (r326888) +++ user/jeff/numa/sys/kern/init_sysent.c Fri Dec 15 23:35:20 2017 (r326889) @@ -612,4 +612,6 @@ struct sysent sysent[] = { { AS(fhstatfs_args), (sy_call_t *)sys_fhstatfs, AUE_FHSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 558 = fhstatfs */ { AS(mknodat_args), (sy_call_t *)sys_mknodat, AUE_MKNODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 559 = mknodat */ { AS(kevent_args), (sy_call_t *)sys_kevent, AUE_KEVENT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 560 = kevent */ + { AS(cpuset_getdomain_args), (sy_call_t *)sys_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 561 = cpuset_getdomain */ + { AS(cpuset_setdomain_args), (sy_call_t *)sys_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 562 = cpuset_setdomain */ }; Modified: user/jeff/numa/sys/kern/kern_cpuset.c ============================================================================== --- user/jeff/numa/sys/kern/kern_cpuset.c Fri Dec 15 23:19:49 2017 (r326888) +++ user/jeff/numa/sys/kern/kern_cpuset.c Fri Dec 15 23:35:20 2017 (r326889) @@ -51,17 +51,21 @@ __FBSDID("$FreeBSD$"); #include <sys/syscallsubr.h> #include <sys/capsicum.h> #include <sys/cpuset.h> +#include <sys/domainset.h> #include <sys/sx.h> #include <sys/queue.h> #include <sys/libkern.h> #include <sys/limits.h> #include <sys/bus.h> #include <sys/interrupt.h> +#include <sys/vmmeter.h> #include <vm/uma.h> #include <vm/vm.h> +#include <vm/vm_object.h> #include <vm/vm_page.h> #include <vm/vm_param.h> +#include <vm/vm_phys.h> #ifdef DDB #include <ddb/ddb.h> @@ -109,8 +113,10 @@ __FBSDID("$FreeBSD$"); * getaffinity call using (CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, ...). */ static uma_zone_t cpuset_zone; +static uma_zone_t domainset_zone; static struct mtx cpuset_lock; static struct setlist cpuset_ids; +static struct domainlist cpuset_domains; static struct unrhdr *cpuset_unr; static struct cpuset *cpuset_zero, *cpuset_default; @@ -122,6 +128,30 @@ cpuset_t *cpuset_root; cpuset_t cpuset_domain[MAXMEMDOM]; /* + * Find the first non-anonymous set starting from 'set'. + */ +static struct cpuset * +cpuset_getbase(struct cpuset *set) +{ + + if (set->cs_id == CPUSET_INVALID) + set = set->cs_parent; + return (set); +} + +/* + * Walks up the tree from 'set' to find the root. + */ +static struct cpuset * +cpuset_getroot(struct cpuset *set) +{ + + while ((set->cs_flags & CPU_SET_ROOT) == 0 && set->cs_parent != NULL) + set = set->cs_parent; + return (set); +} + +/* * Acquire a reference to a cpuset, all pointers must be tracked with refs. */ struct cpuset * @@ -140,12 +170,7 @@ static struct cpuset * cpuset_refroot(struct cpuset *set) { - for (; set->cs_parent != NULL; set = set->cs_parent) - if (set->cs_flags & CPU_SET_ROOT) - break; - cpuset_ref(set); - - return (set); + return cpuset_ref(cpuset_getroot(set)); } /* @@ -157,11 +182,7 @@ static struct cpuset * cpuset_refbase(struct cpuset *set) { - if (set->cs_id == CPUSET_INVALID) - set = set->cs_parent; - cpuset_ref(set); - - return (set); + return cpuset_ref(cpuset_getbase(set)); } /* @@ -257,17 +278,25 @@ cpuset_lookup(cpusetid_t setid, struct thread *td) * will have no valid cpu based on restrictions from the parent. */ static int -_cpuset_create(struct cpuset *set, struct cpuset *parent, const cpuset_t *mask, - cpusetid_t id) +_cpuset_create(struct cpuset *set, struct cpuset *parent, + const cpuset_t *mask, struct domainset *domain, cpusetid_t id) { + if (domain == NULL) + domain = parent->cs_domain; + if (mask == NULL) + mask = &parent->cs_mask; if (!CPU_OVERLAP(&parent->cs_mask, mask)) return (EDEADLK); + /* The domain must be prepared ahead of time. */ + if (!DOMAINSET_SUBSET(&parent->cs_domain->ds_mask, &domain->ds_mask)) + return (EDEADLK); CPU_COPY(mask, &set->cs_mask); LIST_INIT(&set->cs_children); refcount_init(&set->cs_ref, 1); set->cs_flags = 0; mtx_lock_spin(&cpuset_lock); + set->cs_domain = domain; CPU_AND(&set->cs_mask, &parent->cs_mask); set->cs_id = id; set->cs_parent = cpuset_ref(parent); @@ -294,8 +323,8 @@ cpuset_create(struct cpuset **setp, struct cpuset *par id = alloc_unr(cpuset_unr); if (id == -1) return (ENFILE); - *setp = set = uma_zalloc(cpuset_zone, M_WAITOK); - error = _cpuset_create(set, parent, mask, id); + *setp = set = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); + error = _cpuset_create(set, parent, mask, NULL, id); if (error == 0) return (0); free_unr(cpuset_unr, id); @@ -304,7 +333,187 @@ cpuset_create(struct cpuset **setp, struct cpuset *par return (error); } +static void +cpuset_freelist_add(struct setlist *list, int count) +{ + struct cpuset *set; + int i; + + for (i = 0; i < count; i++) { + set = uma_zalloc(cpuset_zone, M_ZERO | M_WAITOK); + LIST_INSERT_HEAD(list, set, cs_link); + } +} + +static void +cpuset_freelist_init(struct setlist *list, int count) +{ + + LIST_INIT(list); + cpuset_freelist_add(list, count); +} + +static void +cpuset_freelist_free(struct setlist *list) +{ + struct cpuset *set; + + while ((set = LIST_FIRST(list)) != NULL) { + LIST_REMOVE(set, cs_link); + uma_zfree(cpuset_zone, set); + } +} + +static void +domainset_freelist_add(struct domainlist *list, int count) +{ + struct domainset *set; + int i; + + for (i = 0; i < count; i++) { + set = uma_zalloc(domainset_zone, M_ZERO | M_WAITOK); + LIST_INSERT_HEAD(list, set, ds_link); + } +} + +static void +domainset_freelist_init(struct domainlist *list, int count) +{ + + LIST_INIT(list); + domainset_freelist_add(list, count); +} + +static void +domainset_freelist_free(struct domainlist *list) +{ + struct domainset *set; + + while ((set = LIST_FIRST(list)) != NULL) { + LIST_REMOVE(set, ds_link); + uma_zfree(domainset_zone, set); + } +} + +/* Copy a domainset preserving mask and policy. */ +static void +domainset_copy(const struct domainset *from, struct domainset *to) +{ + + DOMAINSET_COPY(&from->ds_mask, &to->ds_mask); + to->ds_policy = from->ds_policy; +} + +/* Return 1 if mask and policy are equal, otherwise 0. */ +static int +domainset_equal(const struct domainset *one, const struct domainset *two) +{ + + return (DOMAINSET_CMP(&one->ds_mask, &two->ds_mask) == 0 && + one->ds_policy == two->ds_policy); +} + /* + * Lookup or create a domainset. The key is provided in ds_mask and + * ds_policy. If the domainset does not yet exist the storage in + * 'domain' is used to insert. Otherwise this storage is freed to the + * domainset_zone and the existing domainset is returned. + */ +static struct domainset * +_domainset_create(struct domainset *domain, struct domainlist *freelist) +{ + struct domainset *ndomain; + + mtx_lock_spin(&cpuset_lock); + LIST_FOREACH(ndomain, &cpuset_domains, ds_link) + if (domainset_equal(ndomain, domain)) + break; + /* + * If the domain does not yet exist we insert it and initialize + * various iteration helpers which are not part of the key. + */ + if (ndomain == NULL) { + LIST_INSERT_HEAD(&cpuset_domains, domain, ds_link); + domain->ds_cnt = DOMAINSET_COUNT(&domain->ds_mask); + domain->ds_max = DOMAINSET_FLS(&domain->ds_mask) + 1; + } + mtx_unlock_spin(&cpuset_lock); + if (ndomain == NULL) + return (domain); + if (freelist != NULL) + LIST_INSERT_HEAD(freelist, domain, ds_link); + else + uma_zfree(domainset_zone, domain); + return (ndomain); + +} + +/* + * Create or lookup a domainset based on the key held in 'domain'. + */ +static struct domainset * +domainset_create(const struct domainset *domain) +{ + struct domainset *ndomain; + + ndomain = uma_zalloc(domainset_zone, M_WAITOK | M_ZERO); + domainset_copy(domain, ndomain); + return _domainset_create(ndomain, NULL); +} + +/* + * Update thread domainset pointers. + */ +static void +domainset_notify(void) +{ + struct thread *td; + struct proc *p; + + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state == PRS_NEW) { + PROC_UNLOCK(p); + continue; + } + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + td->td_domain.dr_policy = td->td_cpuset->cs_domain; + thread_unlock(td); + } + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + kernel_object->domain.dr_policy = cpuset_default->cs_domain; +} + +/* + * Create a new set that is a subset of a parent. + */ +static struct domainset * +domainset_shadow(const struct domainset *pdomain, + const struct domainset *domain, struct domainlist *freelist) +{ + struct domainset *ndomain; + + ndomain = LIST_FIRST(freelist); + LIST_REMOVE(ndomain, ds_link); + + /* + * Initialize the key from the request. + */ + domainset_copy(domain, ndomain); + + /* + * Restrict the key by the parent. + */ + DOMAINSET_AND(&ndomain->ds_mask, &pdomain->ds_mask); + + return _domainset_create(ndomain, freelist); +} + +/* * Recursively check for errors that would occur from applying mask to * the tree of sets starting at 'set'. Checks for sets that would become * empty as well as RDONLY flags. @@ -376,10 +585,12 @@ cpuset_modify(struct cpuset *set, cpuset_t *mask) * Verify that we have access to this set of * cpus. */ - root = set->cs_parent; - if (root && !CPU_SUBSET(&root->cs_mask, mask)) - return (EINVAL); + root = cpuset_getroot(set); mtx_lock_spin(&cpuset_lock); + if (root && !CPU_SUBSET(&root->cs_mask, mask)) { + error = EINVAL; + goto out; + } error = cpuset_testupdate(set, mask, 0); if (error) goto out; @@ -392,6 +603,136 @@ out: } /* + * Recursively check for errors that would occur from applying mask to + * the tree of sets starting at 'set'. Checks for sets that would become + * empty as well as RDONLY flags. + */ +static int +cpuset_testupdate_domain(struct cpuset *set, struct domainset *dset, + struct domainset *orig, int *count, int check_mask) +{ + struct cpuset *nset; + struct domainset *domain; + struct domainset newset; + int error; + + mtx_assert(&cpuset_lock, MA_OWNED); + if (set->cs_flags & CPU_SET_RDONLY) + return (EPERM); + domain = set->cs_domain; + domainset_copy(domain, &newset); + if (!domainset_equal(domain, orig)) { + if (!DOMAINSET_OVERLAP(&domain->ds_mask, &dset->ds_mask)) + return (EDEADLK); + DOMAINSET_AND(&newset.ds_mask, &dset->ds_mask); + /* Count the number of domains that are changing. */ + (*count)++; + } + error = 0; + LIST_FOREACH(nset, &set->cs_children, cs_siblings) + if ((error = cpuset_testupdate_domain(nset, &newset, domain, + count, 1)) != 0) + break; + return (error); +} + +/* + * Applies the mask 'mask' without checking for empty sets or permissions. + */ +static void +cpuset_update_domain(struct cpuset *set, struct domainset *domain, + struct domainset *orig, struct domainlist *domains) +{ + struct cpuset *nset; + + mtx_assert(&cpuset_lock, MA_OWNED); + /* + * If this domainset has changed from the parent we must calculate + * a new set. Otherwise it simply inherits from the parent. When + * we inherit from the parent we get a new mask and policy. If the + * set is modified from the parent we keep the policy and only + * update the mask. + */ + if (set->cs_domain != orig) { + orig = set->cs_domain; + set->cs_domain = domainset_shadow(domain, orig, domains); + } else + set->cs_domain = domain; + LIST_FOREACH(nset, &set->cs_children, cs_siblings) + cpuset_update_domain(nset, set->cs_domain, orig, domains); + + return; +} + +/* + * Modify the set 'set' to use a copy the domainset provided. Apply this new + * mask to restrict all children in the tree. Checks for validity before + * applying the changes. + */ +static int +cpuset_modify_domain(struct cpuset *set, struct domainset *domain) +{ + struct domainlist domains; + struct domainset temp; + struct domainset *dset; + struct cpuset *root; + int ndomains, needed; + int error; + + error = priv_check(curthread, PRIV_SCHED_CPUSET); + if (error) + return (error); + /* + * In case we are called from within the jail + * we do not allow modifying the dedicated root + * cpuset of the jail but may still allow to + * change child sets. + */ + if (jailed(curthread->td_ucred) && + set->cs_flags & CPU_SET_ROOT) + return (EPERM); + domainset_freelist_init(&domains, 0); + domain = domainset_create(domain); + ndomains = needed = 0; + do { + if (ndomains < needed) { + domainset_freelist_add(&domains, needed - ndomains); + ndomains = needed; + } + root = cpuset_getroot(set); + mtx_lock_spin(&cpuset_lock); + dset = root->cs_domain; + /* + * Verify that we have access to this set of domains. + */ + if (root && + !DOMAINSET_SUBSET(&dset->ds_mask, &domain->ds_mask)) { + error = EINVAL; + goto out; + } + /* + * Determine whether we can apply this set of domains and + * how many new domain structures it will require. + */ + domainset_copy(domain, &temp); + needed = 0; + error = cpuset_testupdate_domain(set, &temp, set->cs_domain, + &needed, 0); + if (error) + goto out; + } while (ndomains < needed); + dset = set->cs_domain; + cpuset_update_domain(set, domain, dset, &domains); +out: + mtx_unlock_spin(&cpuset_lock); + domainset_freelist_free(&domains); + if (error == 0) + domainset_notify(); + + return (error); +} + +/* * Resolve the 'which' parameter of several cpuset apis. * * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid. Also @@ -481,44 +822,204 @@ cpuset_which(cpuwhich_t which, id_t id, struct proc ** return (0); } +static int +cpuset_testshadow(struct cpuset *set, const cpuset_t *mask, + const struct domainset *domain) +{ + struct cpuset *parent; + struct domainset *dset; + + parent = cpuset_getbase(set); + /* + * If we are restricting a cpu mask it must be a subset of the + * parent or invalid CPUs have been specified. + */ + if (mask != NULL && !CPU_SUBSET(&parent->cs_mask, mask)) + return (EINVAL); + + /* + * If we are restricting a domain mask it must be a subset of the + * parent or invalid domains have been specified. + */ + dset = parent->cs_domain; + if (domain != NULL && + !DOMAINSET_SUBSET(&dset->ds_mask, &domain->ds_mask)) + return (EINVAL); + + return (0); +} + /* * Create an anonymous set with the provided mask in the space provided by - * 'fset'. If the passed in set is anonymous we use its parent otherwise + * 'nset'. If the passed in set is anonymous we use its parent otherwise * the new set is a child of 'set'. */ static int -cpuset_shadow(struct cpuset *set, struct cpuset *fset, const cpuset_t *mask) +cpuset_shadow(struct cpuset *set, struct cpuset **nsetp, + const cpuset_t *mask, const struct domainset *domain, + struct setlist *cpusets, struct domainlist *domains) { struct cpuset *parent; + struct cpuset *nset; + struct domainset *dset; + struct domainset *d; + int error; - if (set->cs_id == CPUSET_INVALID) - parent = set->cs_parent; + error = cpuset_testshadow(set, mask, domain); + if (error) + return (error); + + parent = cpuset_getbase(set); + dset = parent->cs_domain; + if (mask == NULL) + mask = &set->cs_mask; + if (domain != NULL) + d = domainset_shadow(dset, domain, domains); else - parent = set; - if (!CPU_SUBSET(&parent->cs_mask, mask)) + d = set->cs_domain; + nset = LIST_FIRST(cpusets); + error = _cpuset_create(nset, parent, mask, d, CPUSET_INVALID); + if (error == 0) { + LIST_REMOVE(nset, cs_link); + *nsetp = nset; + } + return (error); +} + +static struct cpuset * +cpuset_update_thread(struct thread *td, struct cpuset *nset) +{ + struct cpuset *tdset; + + tdset = td->td_cpuset; + td->td_cpuset = nset; + td->td_domain.dr_policy = nset->cs_domain; + sched_affinity(td); + + return (tdset); +} + +static int +cpuset_setproc_test_maskthread(struct cpuset *tdset, cpuset_t *mask, + struct domainset *domain) +{ + struct cpuset *parent; + + parent = cpuset_getbase(tdset); + if (mask == NULL) + mask = &tdset->cs_mask; + if (domain == NULL) + domain = tdset->cs_domain; + return cpuset_testshadow(parent, mask, domain); +} + +static int +cpuset_setproc_maskthread(struct cpuset *tdset, cpuset_t *mask, + struct domainset *domain, struct cpuset **nsetp, + struct setlist *freelist, struct domainlist *domainlist) +{ + struct cpuset *parent; + + parent = cpuset_getbase(tdset); + if (mask == NULL) + mask = &tdset->cs_mask; + if (domain == NULL) + domain = tdset->cs_domain; + return cpuset_shadow(parent, nsetp, mask, domain, freelist, + domainlist); +} + +static int +cpuset_setproc_setthread_mask(struct cpuset *tdset, struct cpuset *set, + cpuset_t *mask, struct domainset *domain) +{ + struct cpuset *parent; + + parent = cpuset_getbase(tdset); + + /* + * If the thread restricted its mask then apply that same + * restriction to the new set, otherwise take it wholesale. + */ + if (CPU_CMP(&tdset->cs_mask, &parent->cs_mask) != 0) { + CPU_COPY(&tdset->cs_mask, mask); + CPU_AND(mask, &set->cs_mask); + } else + CPU_COPY(&set->cs_mask, mask); + + /* + * If the thread restricted the domain then we apply the + * restriction to the new set but retain the policy. + */ + if (tdset->cs_domain != parent->cs_domain) { + domainset_copy(tdset->cs_domain, domain); + DOMAINSET_AND(&domain->ds_mask, &set->cs_domain->ds_mask); + } else + domainset_copy(set->cs_domain, domain); + + if (CPU_EMPTY(mask) || DOMAINSET_EMPTY(&domain->ds_mask)) return (EDEADLK); - return (_cpuset_create(fset, parent, mask, CPUSET_INVALID)); + + return (0); } +static int +cpuset_setproc_test_setthread(struct cpuset *tdset, struct cpuset *set) +{ + struct domainset domain; + cpuset_t mask; + + if (tdset->cs_id != CPUSET_INVALID) + return (0); + return cpuset_setproc_setthread_mask(tdset, set, &mask, &domain); +} + +static int +cpuset_setproc_setthread(struct cpuset *tdset, struct cpuset *set, + struct cpuset **nsetp, struct setlist *freelist, + struct domainlist *domainlist) +{ + struct domainset domain; + cpuset_t mask; + int error; + + /* + * If we're replacing on a thread that has not constrained the + * original set we can simply accept the new set. + */ + if (tdset->cs_id != CPUSET_INVALID) { + *nsetp = cpuset_ref(set); + return (0); + } + error = cpuset_setproc_setthread_mask(tdset, set, &mask, &domain); + if (error) + return (error); + + return cpuset_shadow(tdset, nsetp, &mask, &domain, freelist, + domainlist); +} + /* - * Handle two cases for replacing the base set or mask of an entire process. + * Handle three cases for updating an entire process. * - * 1) Set is non-null and mask is null. This reparents all anonymous sets - * to the provided set and replaces all non-anonymous td_cpusets with the - * provided set. - * 2) Mask is non-null and set is null. This replaces or creates anonymous - * sets for every thread with the existing base as a parent. + * 1) Set is non-null. This reparents all anonymous sets to the provided + * set and replaces all non-anonymous td_cpusets with the provided set. + * 2) Mask is non-null. This replaces or creates anonymous sets for every + * thread with the existing base as a parent. + * 3) domain is non-null. This creates anonymous sets for every thread + * and replaces the domain set. * * This is overly complicated because we can't allocate while holding a * spinlock and spinlocks must be held while changing and examining thread * state. */ static int -cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask) +cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask, + struct domainset *domain) { struct setlist freelist; struct setlist droplist; - struct cpuset *tdset; + struct domainlist domainlist; struct cpuset *nset; struct thread *td; struct proc *p; @@ -533,7 +1034,9 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t * 2) If enough cpusets have not been allocated release the locks and * allocate them. Loop. */ - LIST_INIT(&freelist); + cpuset_freelist_init(&freelist, 1); + domainset_freelist_init(&domainlist, 1); + nfree = 1; LIST_INIT(&droplist); nfree = 0; for (;;) { @@ -544,39 +1047,27 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t break; threads = p->p_numthreads; PROC_UNLOCK(p); - for (; nfree < threads; nfree++) { - nset = uma_zalloc(cpuset_zone, M_WAITOK); - LIST_INSERT_HEAD(&freelist, nset, cs_link); + if (nfree < threads) { + cpuset_freelist_add(&freelist, threads - nfree); + domainset_freelist_add(&domainlist, threads - nfree); + nfree = threads; } } PROC_LOCK_ASSERT(p, MA_OWNED); /* * Now that the appropriate locks are held and we have enough cpusets, - * make sure the operation will succeed before applying changes. The + * make sure the operation will succeed before applying changes. The * proc lock prevents td_cpuset from changing between calls. */ error = 0; FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); - tdset = td->td_cpuset; - /* - * Verify that a new mask doesn't specify cpus outside of - * the set the thread is a member of. - */ - if (mask) { - if (tdset->cs_id == CPUSET_INVALID) - tdset = tdset->cs_parent; - if (!CPU_SUBSET(&tdset->cs_mask, mask)) - error = EDEADLK; - /* - * Verify that a new set won't leave an existing thread - * mask without a cpu to run on. It can, however, restrict - * the set. - */ - } else if (tdset->cs_id == CPUSET_INVALID) { - if (!CPU_OVERLAP(&set->cs_mask, &tdset->cs_mask)) - error = EDEADLK; - } + if (set != NULL) + error = cpuset_setproc_test_setthread(td->td_cpuset, + set); + else + error = cpuset_setproc_test_maskthread(td->td_cpuset, + mask, domain); thread_unlock(td); if (error) goto unlock_out; @@ -588,33 +1079,17 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t */ FOREACH_THREAD_IN_PROC(p, td) { thread_lock(td); - /* - * If we presently have an anonymous set or are applying a - * mask we must create an anonymous shadow set. That is - * either parented to our existing base or the supplied set. - * - * If we have a base set with no anonymous shadow we simply - * replace it outright. - */ - tdset = td->td_cpuset; - if (tdset->cs_id == CPUSET_INVALID || mask) { - nset = LIST_FIRST(&freelist); - LIST_REMOVE(nset, cs_link); - if (mask) - error = cpuset_shadow(tdset, nset, mask); - else - error = _cpuset_create(nset, set, - &tdset->cs_mask, CPUSET_INVALID); - if (error) { - LIST_INSERT_HEAD(&freelist, nset, cs_link); - thread_unlock(td); - break; - } - } else - nset = cpuset_ref(set); - cpuset_rel_defer(&droplist, tdset); - td->td_cpuset = nset; - sched_affinity(td); + if (set != NULL) + error = cpuset_setproc_setthread(td->td_cpuset, set, + &nset, &freelist, &domainlist); + else + error = cpuset_setproc_maskthread(td->td_cpuset, mask, + domain, &nset, &freelist, &domainlist); + if (error) { + thread_unlock(td); + break; + } + cpuset_rel_defer(&droplist, cpuset_update_thread(td, nset)); thread_unlock(td); } unlock_out: @@ -622,10 +1097,8 @@ unlock_out: out: while ((nset = LIST_FIRST(&droplist)) != NULL) cpuset_rel_complete(nset); - while ((nset = LIST_FIRST(&freelist)) != NULL) { - LIST_REMOVE(nset, cs_link); - uma_zfree(cpuset_zone, nset); - } + cpuset_freelist_free(&freelist); + domainset_freelist_free(&domainlist); return (error); } @@ -690,46 +1163,57 @@ cpusetobj_strscan(cpuset_t *set, const char *buf) } /* - * Apply an anonymous mask to a single thread. + * Apply an anonymous mask or a domain to a single thread. */ -int -cpuset_setthread(lwpid_t id, cpuset_t *mask) +static int +_cpuset_setthread(lwpid_t id, cpuset_t *mask, struct domainset *domain) { + struct setlist cpusets; + struct domainlist domainlist; struct cpuset *nset; struct cpuset *set; struct thread *td; struct proc *p; int error; - nset = uma_zalloc(cpuset_zone, M_WAITOK); + cpuset_freelist_init(&cpusets, 1); + domainset_freelist_init(&domainlist, domain != NULL); error = cpuset_which(CPU_WHICH_TID, id, &p, &td, &set); if (error) goto out; set = NULL; thread_lock(td); - error = cpuset_shadow(td->td_cpuset, nset, mask); - if (error == 0) { - set = td->td_cpuset; - td->td_cpuset = nset; - sched_affinity(td); - nset = NULL; - } + error = cpuset_shadow(td->td_cpuset, &nset, mask, domain, + &cpusets, &domainlist); + if (error == 0) + set = cpuset_update_thread(td, nset); thread_unlock(td); PROC_UNLOCK(p); if (set) cpuset_rel(set); out: - if (nset) - uma_zfree(cpuset_zone, nset); + cpuset_freelist_free(&cpusets); + domainset_freelist_free(&domainlist); return (error); } /* + * Apply an anonymous mask to a single thread. + */ +int +cpuset_setthread(lwpid_t id, cpuset_t *mask) +{ + + return _cpuset_setthread(id, mask, NULL); +} + +/* * Apply new cpumask to the ithread. */ int cpuset_setithread(lwpid_t id, int cpu) { + struct setlist cpusets; struct cpuset *nset, *rset; struct cpuset *parent, *old_set; struct thread *td; @@ -738,8 +1222,8 @@ cpuset_setithread(lwpid_t id, int cpu) cpuset_t mask; int error; - nset = uma_zalloc(cpuset_zone, M_WAITOK); - rset = uma_zalloc(cpuset_zone, M_WAITOK); + cpuset_freelist_init(&cpusets, 1); + rset = uma_zalloc(cpuset_zone, M_WAITOK | M_ZERO); cs_id = CPUSET_INVALID; CPU_ZERO(&mask); @@ -756,13 +1240,15 @@ cpuset_setithread(lwpid_t id, int cpu) old_set = td->td_cpuset; if (cpu == NOCPU) { + nset = LIST_FIRST(&cpusets); + LIST_REMOVE(nset, cs_link); /* * roll back to default set. We're not using cpuset_shadow() * here because we can fail CPU_SUBSET() check. This can happen * if default set does not contain all CPUs. */ - error = _cpuset_create(nset, cpuset_default, &mask, + error = _cpuset_create(nset, cpuset_default, &mask, NULL, CPUSET_INVALID); goto applyset; @@ -779,7 +1265,7 @@ cpuset_setithread(lwpid_t id, int cpu) * with any mask. */ error = _cpuset_create(rset, cpuset_zero, - &cpuset_zero->cs_mask, cs_id); + &cpuset_zero->cs_mask, NULL, cs_id); if (error != 0) { PROC_UNLOCK(p); goto out; @@ -794,22 +1280,19 @@ cpuset_setithread(lwpid_t id, int cpu) old_set = NULL; } - error = cpuset_shadow(parent, nset, &mask); + error = cpuset_shadow(parent, &nset, &mask, NULL, &cpusets, NULL); applyset: if (error == 0) { thread_lock(td); *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201712152335.vBFNZKxj049606>