Date: Sun, 1 Apr 2018 04:11:38 +0000 (UTC) From: Jeff Roberson <jeff@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r331861 - in user/jeff/numa: lib/libc/sys sys/compat/freebsd32 sys/kern sys/sys sys/vm Message-ID: <201804010411.w314Bc5p043555@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jeff Date: Sun Apr 1 04:11:38 2018 New Revision: 331861 URL: https://svnweb.freebsd.org/changeset/base/331861 Log: Experimental support for msetdomain() a syscall similar to linux's mbind() that allows you to set NUMA policy on memory ranges. Modified: user/jeff/numa/lib/libc/sys/Symbol.map user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c user/jeff/numa/sys/compat/freebsd32/syscalls.master user/jeff/numa/sys/kern/init_sysent.c user/jeff/numa/sys/kern/kern_cpuset.c user/jeff/numa/sys/kern/syscalls.c user/jeff/numa/sys/kern/syscalls.master user/jeff/numa/sys/kern/systrace_args.c user/jeff/numa/sys/sys/domainset.h user/jeff/numa/sys/sys/syscall.h user/jeff/numa/sys/sys/syscall.mk user/jeff/numa/sys/sys/syscallsubr.h user/jeff/numa/sys/sys/sysproto.h user/jeff/numa/sys/vm/vm_fault.c user/jeff/numa/sys/vm/vm_map.c user/jeff/numa/sys/vm/vm_map.h user/jeff/numa/sys/vm/vm_object.c Modified: user/jeff/numa/lib/libc/sys/Symbol.map ============================================================================== --- user/jeff/numa/lib/libc/sys/Symbol.map Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/lib/libc/sys/Symbol.map Sun Apr 1 04:11:38 2018 (r331861) @@ -401,6 +401,7 @@ FBSD_1.5 { statfs; cpuset_getdomain; cpuset_setdomain; + msetdomain; }; FBSDprivate_1.0 { @@ -1029,4 +1030,6 @@ FBSDprivate_1.0 { __sys_cpuset_getdomain; _cpuset_setdomain; __sys_cpuset_setdomain; + _msetdomain; + __msetdomain; }; Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h ============================================================================== --- user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/compat/freebsd32/freebsd32_syscall.h Sun Apr 1 04:11:38 2018 (r331861) @@ -469,4 +469,5 @@ #define FREEBSD32_SYS_freebsd32_cpuset_getdomain 561 #define FREEBSD32_SYS_freebsd32_cpuset_setdomain 562 #define FREEBSD32_SYS_getrandom 563 -#define FREEBSD32_SYS_MAXSYSCALL 564 +#define FREEBSD32_SYS_msetdomain 564 +#define FREEBSD32_SYS_MAXSYSCALL 565 Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c ============================================================================== --- user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/compat/freebsd32/freebsd32_syscalls.c Sun Apr 1 04:11:38 2018 (r331861) @@ -596,4 +596,5 @@ const char *freebsd32_syscallnames[] = { "freebsd32_cpuset_getdomain", /* 561 = freebsd32_cpuset_getdomain */ "freebsd32_cpuset_setdomain", /* 562 = freebsd32_cpuset_setdomain */ "getrandom", /* 563 = getrandom */ + "msetdomain", /* 564 = msetdomain */ }; Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c ============================================================================== --- user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/compat/freebsd32/freebsd32_sysent.c Sun Apr 1 04:11:38 2018 (r331861) @@ -645,4 +645,5 @@ struct sysent freebsd32_sysent[] = { { AS(freebsd32_cpuset_getdomain_args), (sy_call_t *)freebsd32_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 561 = freebsd32_cpuset_getdomain */ { AS(freebsd32_cpuset_setdomain_args), (sy_call_t *)freebsd32_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 562 = freebsd32_cpuset_setdomain */ { AS(getrandom_args), (sy_call_t *)sys_getrandom, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 563 = getrandom */ + { AS(msetdomain_args), (sy_call_t *)sys_msetdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 564 = msetdomain */ }; Modified: user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c ============================================================================== --- user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/compat/freebsd32/freebsd32_systrace_args.c Sun Apr 1 04:11:38 2018 (r331861) @@ -3283,6 +3283,18 @@ systrace_args(int sysnum, void *params, uint64_t *uarg *n_args = 3; break; } + /* msetdomain */ + case 564: { + struct msetdomain_args *p = params; + uarg[0] = (intptr_t) p->addr; /* void * */ + uarg[1] = p->size; /* size_t */ + uarg[2] = p->domainsetsize; /* size_t */ + uarg[3] = (intptr_t) p->mask; /* domainset_t * */ + iarg[4] = p->policy; /* int */ + iarg[5] = p->flags; /* int */ + *n_args = 6; + break; + } default: *n_args = 0; break; @@ -8825,6 +8837,31 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *d break; }; break; + /* msetdomain */ + case 564: + switch(ndx) { + case 0: + p = "userland void *"; + break; + case 1: + p = "size_t"; + break; + case 2: + p = "size_t"; + break; + case 3: + p = "userland domainset_t *"; + break; + case 4: + p = "int"; + break; + case 5: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -10678,6 +10715,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char * break; /* getrandom */ case 563: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* msetdomain */ + case 564: if (ndx == 0 || ndx == 1) p = "int"; break; Modified: user/jeff/numa/sys/compat/freebsd32/syscalls.master ============================================================================== --- user/jeff/numa/sys/compat/freebsd32/syscalls.master Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/compat/freebsd32/syscalls.master Sun Apr 1 04:11:38 2018 (r331861) @@ -1118,5 +1118,9 @@ int policy); } 563 AUE_NULL NOPROTO { int getrandom(void *buf, size_t buflen, \ unsigned int flags); } +564 AUE_NULL NOPROTO { int msetdomain(void *addr, \ + size_t size, size_t domainsetsize, \ + domainset_t *mask, int policy, \ + int flags); } ; vim: syntax=off Modified: user/jeff/numa/sys/kern/init_sysent.c ============================================================================== --- user/jeff/numa/sys/kern/init_sysent.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/kern/init_sysent.c Sun Apr 1 04:11:38 2018 (r331861) @@ -615,4 +615,5 @@ struct sysent sysent[] = { { AS(cpuset_getdomain_args), (sy_call_t *)sys_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 561 = cpuset_getdomain */ { AS(cpuset_setdomain_args), (sy_call_t *)sys_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 562 = cpuset_setdomain */ { AS(getrandom_args), (sy_call_t *)sys_getrandom, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 563 = getrandom */ + { AS(msetdomain_args), (sy_call_t *)sys_msetdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 564 = msetdomain */ }; Modified: user/jeff/numa/sys/kern/kern_cpuset.c ============================================================================== --- user/jeff/numa/sys/kern/kern_cpuset.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/kern/kern_cpuset.c Sun Apr 1 04:11:38 2018 (r331861) @@ -64,6 +64,9 @@ __FBSDID("$FreeBSD$"); #include <vm/uma.h> #include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> #include <vm/vm_object.h> #include <vm/vm_extern.h> @@ -2005,6 +2008,57 @@ out: return (error); } +static int +domainset_copyin(struct domainset *domain, size_t domainsetsize, + const domainset_t *maskp, int policy) +{ + domainset_t *mask; + char *end, *cp; + int error; + + if (domainsetsize < sizeof(domainset_t) || + domainsetsize > DOMAINSET_MAXSIZE / NBBY) + return (ERANGE); + + if (policy <= DOMAINSET_POLICY_INVALID || + policy > DOMAINSET_POLICY_MAX) + return (EINVAL); + + memset(domain, 0, sizeof(*domain)); + mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); + error = copyin(maskp, mask, domainsetsize); + if (error != 0) + goto out; + /* + * Verify that no high bits are set. + */ + if (domainsetsize > sizeof(domainset_t)) { + end = cp = (char *)&mask->__bits; + end += domainsetsize; + cp += sizeof(domainset_t); + while (cp != end) + if (*cp++ != 0) { + error = EINVAL; + goto out; + } + + } + DOMAINSET_COPY(mask, &domain->ds_mask); + domain->ds_policy = policy; + /* Translate preferred policy into a mask and fallback. */ + if (policy == DOMAINSET_POLICY_PREFER) { + /* Only support a single preferred domain. */ + if (DOMAINSET_COUNT(&domain->ds_mask) != 1) { + error = EINVAL; + goto out; + } + domain->ds_prefer = DOMAINSET_FFS(&domain->ds_mask) - 1; + } +out: + free(mask, M_TEMP); + return (error); +} + #ifndef _SYS_SYSPROTO_H_ struct cpuset_setdomain_args { cpulevel_t level; @@ -2015,6 +2069,7 @@ struct cpuset_setdomain_args { int policy; }; #endif + int sys_cpuset_setdomain(struct thread *td, struct cpuset_setdomain_args *uap) { @@ -2032,12 +2087,8 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t le struct thread *ttd; struct proc *p; struct domainset domain; - domainset_t *mask; int error; - if (domainsetsize < sizeof(domainset_t) || - domainsetsize > DOMAINSET_MAXSIZE / NBBY) - return (ERANGE); /* In Capability mode, you can only set your own CPU set. */ if (IN_CAPABILITY_MODE(td)) { if (level != CPU_LEVEL_WHICH) @@ -2047,43 +2098,13 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t le if (id != -1) return (ECAPMODE); } - memset(&domain, 0, sizeof(domain)); - mask = malloc(domainsetsize, M_TEMP, M_WAITOK | M_ZERO); - error = copyin(maskp, mask, domainsetsize); - if (error) - goto out; - /* - * Verify that no high bits are set. - */ - if (domainsetsize > sizeof(domainset_t)) { - char *end; - char *cp; - end = cp = (char *)&mask->__bits; - end += domainsetsize; - cp += sizeof(domainset_t); - while (cp != end) - if (*cp++ != 0) { - error = EINVAL; - goto out; - } - - } - DOMAINSET_COPY(mask, &domain.ds_mask); - domain.ds_policy = policy; - if (policy <= DOMAINSET_POLICY_INVALID || - policy > DOMAINSET_POLICY_MAX) - return (EINVAL); - - /* Translate preferred policy into a mask and fallback. */ - if (policy == DOMAINSET_POLICY_PREFER) { - /* Only support a single preferred domain. */ - if (DOMAINSET_COUNT(&domain.ds_mask) != 1) - return (EINVAL); - domain.ds_prefer = DOMAINSET_FFS(&domain.ds_mask) - 1; - /* This will be constrained by domainset_shadow(). */ + error = domainset_copyin(&domain, domainsetsize, maskp, policy); + if (error) + return (error); + /* This will be constrained by cpuset_shadow(). */ + if (policy == DOMAINSET_POLICY_PREFER) DOMAINSET_FILL(&domain.ds_mask); - } switch (level) { case CPU_LEVEL_ROOT: @@ -2146,12 +2167,106 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t le break; } out: - free(mask, M_TEMP); return (error); } -#ifdef DDB +#ifndef _SYS_SYSPROTO_H_ +struct msetdomain_args { + void *addr; + size_t size; + size_t domainsetsize; + domainset_t *mask; + int policy; + int flags; +}; +#endif +int +sys_msetdomain(struct thread *td, struct msetdomain_args *uap) +{ + return (kern_msetdomain(td, (uintptr_t)uap->addr, uap->size, + uap->domainsetsize, uap->mask, uap->policy, uap->flags)); +} + +int +kern_msetdomain(struct thread *td, uintptr_t addr0, size_t size, + size_t domainsetsize, const domainset_t *mask, int policy, int flags) +{ + struct domainset domain, *set, *nset; + struct cpuset *cset; + struct thread *ttd; + struct proc *p; + vm_offset_t addr; + vm_size_t pageoff; + int error; + + /* Normalize the addresses. */ + addr = trunc_page(addr0); + pageoff = (addr & PAGE_MASK); + addr -= pageoff; + size += pageoff; + size = (vm_size_t)round_page(size); + if (addr + size < addr) + return (EINVAL); + + /* Short-circuit for POLICY_INVALID == reset to default. */ + if (policy == DOMAINSET_POLICY_INVALID) { + nset = NULL; + goto apply; + } + + /* + * Copy in and initialize the domainset from the user arguments. + */ + error = domainset_copyin(&domain, domainsetsize, mask, policy); + if (error) + return (error); + + /* + * Grab the list of allowed domains from the numbered cpuset this + * process is a member of. + */ + error = cpuset_which(CPU_WHICH_PID, -1, &p, &ttd, &cset); + if (error) + return (error); + thread_lock(ttd); + set = cpuset_getbase(ttd->td_cpuset)->cs_domain; + thread_unlock(ttd); + PROC_UNLOCK(p); + + /* + * Validate the new policy against the allowed set. + */ + if (policy == DOMAINSET_POLICY_PREFER) + DOMAINSET_COPY(&set->ds_mask, &domain.ds_mask); + if (!domainset_valid(set, &domain)) + return (EINVAL); + + /* + * Attempt to create a new set based on this key. + */ + nset = domainset_create(&domain); + if (nset == NULL) + return (EINVAL); + + /* + * Attempt to apply the new set to the memory range. + */ +apply: + switch (vm_map_setdomain(&td->td_proc->p_vmspace->vm_map, addr, + addr + size, nset, flags)) { + case KERN_SUCCESS: + break; + case KERN_INVALID_ADDRESS: + return (EFAULT); + default: + return (EINVAL); + } + + return (0); +} + +#ifdef DDB static void ddb_display_bitset(const struct bitset *set, int size) { Modified: user/jeff/numa/sys/kern/syscalls.c ============================================================================== --- user/jeff/numa/sys/kern/syscalls.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/kern/syscalls.c Sun Apr 1 04:11:38 2018 (r331861) @@ -570,4 +570,5 @@ const char *syscallnames[] = { "cpuset_getdomain", /* 561 = cpuset_getdomain */ "cpuset_setdomain", /* 562 = cpuset_setdomain */ "getrandom", /* 563 = getrandom */ + "msetdomain", /* 564 = msetdomain */ }; Modified: user/jeff/numa/sys/kern/syscalls.master ============================================================================== --- user/jeff/numa/sys/kern/syscalls.master Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/kern/syscalls.master Sun Apr 1 04:11:38 2018 (r331861) @@ -1023,6 +1023,9 @@ int policy); } 563 AUE_NULL STD { int getrandom(void *buf, size_t buflen, \ unsigned int flags); } +564 AUE_NULL STD { int msetdomain(void *addr, size_t size, \ + size_t domainsetsize, domainset_t *mask, \ + int policy, int flags); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master Modified: user/jeff/numa/sys/kern/systrace_args.c ============================================================================== --- user/jeff/numa/sys/kern/systrace_args.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/kern/systrace_args.c Sun Apr 1 04:11:38 2018 (r331861) @@ -3291,6 +3291,18 @@ systrace_args(int sysnum, void *params, uint64_t *uarg *n_args = 3; break; } + /* msetdomain */ + case 564: { + struct msetdomain_args *p = params; + uarg[0] = (intptr_t) p->addr; /* void * */ + uarg[1] = p->size; /* size_t */ + uarg[2] = p->domainsetsize; /* size_t */ + uarg[3] = (intptr_t) p->mask; /* domainset_t * */ + iarg[4] = p->policy; /* int */ + iarg[5] = p->flags; /* int */ + *n_args = 6; + break; + } default: *n_args = 0; break; @@ -8777,6 +8789,31 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *d break; }; break; + /* msetdomain */ + case 564: + switch(ndx) { + case 0: + p = "userland void *"; + break; + case 1: + p = "size_t"; + break; + case 2: + p = "size_t"; + break; + case 3: + p = "userland domainset_t *"; + break; + case 4: + p = "int"; + break; + case 5: + p = "int"; + break; + default: + break; + }; + break; default: break; }; @@ -10665,6 +10702,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char * break; /* getrandom */ case 563: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* msetdomain */ + case 564: if (ndx == 0 || ndx == 1) p = "int"; break; Modified: user/jeff/numa/sys/sys/domainset.h ============================================================================== --- user/jeff/numa/sys/sys/domainset.h Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/sys/domainset.h Sun Apr 1 04:11:38 2018 (r331861) @@ -114,6 +114,7 @@ int cpuset_getdomain(cpulevel_t, cpuwhich_t, id_t, siz int *); int cpuset_setdomain(cpulevel_t, cpuwhich_t, id_t, size_t, const domainset_t *, int); +int msetdomain(void *, size_t, size_t, domainset_t *, int, int); __END_DECLS #endif Modified: user/jeff/numa/sys/sys/syscall.h ============================================================================== --- user/jeff/numa/sys/sys/syscall.h Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/sys/syscall.h Sun Apr 1 04:11:38 2018 (r331861) @@ -479,4 +479,5 @@ #define SYS_cpuset_getdomain 561 #define SYS_cpuset_setdomain 562 #define SYS_getrandom 563 -#define SYS_MAXSYSCALL 564 +#define SYS_msetdomain 564 +#define SYS_MAXSYSCALL 565 Modified: user/jeff/numa/sys/sys/syscall.mk ============================================================================== --- user/jeff/numa/sys/sys/syscall.mk Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/sys/syscall.mk Sun Apr 1 04:11:38 2018 (r331861) @@ -406,4 +406,5 @@ MIASM = \ kevent.o \ cpuset_getdomain.o \ cpuset_setdomain.o \ - getrandom.o + getrandom.o \ + msetdomain.o Modified: user/jeff/numa/sys/sys/syscallsubr.h ============================================================================== --- user/jeff/numa/sys/sys/syscallsubr.h Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/sys/syscallsubr.h Sun Apr 1 04:11:38 2018 (r331861) @@ -175,6 +175,9 @@ int kern_mlock(struct proc *proc, struct ucred *cred, int kern_mmap(struct thread *td, uintptr_t addr, size_t size, int prot, int flags, int fd, off_t pos); int kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot); +int kern_msetdomain(struct thread *td, uintptr_t addr, + size_t size, size_t domainsetsize, const domainset_t *maskp, + int policy, int flags); int kern_msgctl(struct thread *, int, int, struct msqid_ds *); int kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *); int kern_msgsnd(struct thread *, int, const void *, size_t, int, long); Modified: user/jeff/numa/sys/sys/sysproto.h ============================================================================== --- user/jeff/numa/sys/sys/sysproto.h Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/sys/sysproto.h Sun Apr 1 04:11:38 2018 (r331861) @@ -1773,6 +1773,14 @@ struct getrandom_args { char buflen_l_[PADL_(size_t)]; size_t buflen; char buflen_r_[PADR_(size_t)]; char flags_l_[PADL_(unsigned int)]; unsigned int flags; char flags_r_[PADR_(unsigned int)]; }; +struct msetdomain_args { + char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)]; + char size_l_[PADL_(size_t)]; size_t size; char size_r_[PADR_(size_t)]; + char domainsetsize_l_[PADL_(size_t)]; size_t domainsetsize; char domainsetsize_r_[PADR_(size_t)]; + char mask_l_[PADL_(domainset_t *)]; domainset_t * mask; char mask_r_[PADR_(domainset_t *)]; + char policy_l_[PADL_(int)]; int policy; char policy_r_[PADR_(int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; +}; int nosys(struct thread *, struct nosys_args *); void sys_sys_exit(struct thread *, struct sys_exit_args *); int sys_fork(struct thread *, struct fork_args *); @@ -2154,6 +2162,7 @@ int sys_kevent(struct thread *, struct kevent_args *); int sys_cpuset_getdomain(struct thread *, struct cpuset_getdomain_args *); int sys_cpuset_setdomain(struct thread *, struct cpuset_setdomain_args *); int sys_getrandom(struct thread *, struct getrandom_args *); +int sys_msetdomain(struct thread *, struct msetdomain_args *); #ifdef COMPAT_43 @@ -3047,6 +3056,7 @@ int freebsd11_mknodat(struct thread *, struct freebsd1 #define SYS_AUE_cpuset_getdomain AUE_NULL #define SYS_AUE_cpuset_setdomain AUE_NULL #define SYS_AUE_getrandom AUE_NULL +#define SYS_AUE_msetdomain AUE_NULL #undef PAD_ #undef PADL_ Modified: user/jeff/numa/sys/vm/vm_fault.c ============================================================================== --- user/jeff/numa/sys/vm/vm_fault.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/vm/vm_fault.c Sun Apr 1 04:11:38 2018 (r331861) @@ -1609,7 +1609,6 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map KASSERT(upgrade || dst_entry->object.vm_object == NULL, ("vm_fault_copy_entry: vm_object not NULL")); if (src_object != dst_object) { - dst_object->domain = src_object->domain; dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; dst_object->charge = dst_entry->end - dst_entry->start; Modified: user/jeff/numa/sys/vm/vm_map.c ============================================================================== --- user/jeff/numa/sys/vm/vm_map.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/vm/vm_map.c Sun Apr 1 04:11:38 2018 (r331861) @@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/domainset.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/lock.h> @@ -848,6 +849,34 @@ vm_map_entry_create(vm_map_t map) } /* + * vm_map_entry_object_allocate: [ internal use only ] + * + * Returns the object associated with a map entry, allocating + * a default object if non presently exists. + */ +static vm_object_t +vm_map_entry_object_allocate(vm_map_t map, vm_map_entry_t entry) +{ + vm_object_t object; + + VM_MAP_ASSERT_LOCKED(map); + if (entry->object.vm_object != NULL) + return (entry->object.vm_object); + + object = vm_object_allocate(OBJT_DEFAULT, + atop(entry->end - entry->start)); + entry->object.vm_object = object; + entry->offset = 0; + if (entry->cred != NULL) { + object->cred = entry->cred; + object->charge = entry->end - entry->start; + entry->cred = NULL; + } + + return (object); +} + +/* * vm_map_entry_set_behavior: * * Set the expected access behavior, either normal, random, or @@ -1773,16 +1802,7 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, */ if (entry->object.vm_object == NULL && !map->system_map && (entry->eflags & MAP_ENTRY_GUARD) == 0) { - vm_object_t object; - object = vm_object_allocate(OBJT_DEFAULT, - atop(entry->end - entry->start)); - entry->object.vm_object = object; - entry->offset = 0; - if (entry->cred != NULL) { - object->cred = entry->cred; - object->charge = entry->end - entry->start; - entry->cred = NULL; - } + vm_map_entry_object_allocate(map, entry); } else if (entry->object.vm_object != NULL && ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && entry->cred != NULL) { @@ -1853,16 +1873,7 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, v */ if (entry->object.vm_object == NULL && !map->system_map && (entry->eflags & MAP_ENTRY_GUARD) == 0) { - vm_object_t object; - object = vm_object_allocate(OBJT_DEFAULT, - atop(entry->end - entry->start)); - entry->object.vm_object = object; - entry->offset = 0; - if (entry->cred != NULL) { - object->cred = entry->cred; - object->charge = entry->end - entry->start; - entry->cred = NULL; - } + vm_map_entry_object_allocate(map, entry); } else if (entry->object.vm_object != NULL && ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && entry->cred != NULL) { @@ -3449,21 +3460,11 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c case VM_INHERIT_SHARE: /* - * Clone the entry, creating the shared object if necessary. + * Clone the entry, creating the shared object if + * necessary. */ - object = old_entry->object.vm_object; - if (object == NULL) { - object = vm_object_allocate(OBJT_DEFAULT, - atop(old_entry->end - old_entry->start)); - old_entry->object.vm_object = object; - old_entry->offset = 0; - if (old_entry->cred != NULL) { - object->cred = old_entry->cred; - object->charge = old_entry->end - - old_entry->start; - old_entry->cred = NULL; - } - } + object = vm_map_entry_object_allocate(old_map, + old_entry); /* * Add the reference before calling vm_object_shadow @@ -4195,16 +4196,7 @@ RetryLookupLocked: !map->system_map) { if (vm_map_lock_upgrade(map)) goto RetryLookup; - entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, - atop(size)); - entry->offset = 0; - if (entry->cred != NULL) { - VM_OBJECT_WLOCK(entry->object.vm_object); - entry->object.vm_object->cred = entry->cred; - entry->object.vm_object->charge = size; - VM_OBJECT_WUNLOCK(entry->object.vm_object); - entry->cred = NULL; - } + vm_map_entry_object_allocate(map, entry); vm_map_lock_downgrade(map); } @@ -4313,6 +4305,107 @@ vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry) * Unlock the main-level map */ vm_map_unlock_read(map); +} + +/* + * vm_map_setdomain: + * + * Assigns the NUMA policy contained in 'domain' to all objects + * overlapping the requested address range. + */ +int +vm_map_setdomain(vm_map_t map, vm_offset_t start, vm_offset_t end, + struct domainset *domain, int flags) +{ + vm_map_entry_t current, entry; + vm_object_t object; + int error; + + error = KERN_SUCCESS; + vm_map_lock(map); + if (start < vm_map_min(map) || end > vm_map_max(map) || + start >= end || map->system_map) { + error = KERN_INVALID_ADDRESS; + goto out; + } + + /* + * Locate starting entry and clip if necessary. + */ + if (!vm_map_lookup_entry(map, start, &entry)) { + error = KERN_INVALID_ADDRESS; + goto out; + } + if (entry->start > start) { + error = KERN_INVALID_ADDRESS; + goto out; + } + vm_map_clip_start(map, entry, start); + + /* + * Walk the range looking for holes before we apply policy. + */ + for (current = entry; + (current != &map->header) && (current->start < end); + current = current->next + ) { + if (current->end >= end) + break; + /* We don't support gaps. */ + if (current->end != current->next->start) { + error = KERN_INVALID_ADDRESS; + goto out; + } + } + + /* + * Walk each overlapping map entry and update the backing + * object's memory policy. + */ + for (current = entry; + (current != &map->header) && (current->start < end); + current = current->next + ) { + /* Skip incompatible entries. */ + if ((current->eflags & + (MAP_ENTRY_GUARD | MAP_ENTRY_IS_SUB_MAP)) != 0) + continue; + + /* + * Clip the end and allocate the object so that we are + * only modifying the requested range. + */ + vm_map_clip_end(map, current, end); + object = vm_map_entry_object_allocate(map, current); + if (current->eflags & MAP_ENTRY_NEEDS_COPY) { + vm_object_shadow(¤t->object.vm_object, + ¤t->offset, current->end - current->start); + current->eflags &= ~MAP_ENTRY_NEEDS_COPY; + object = current->object.vm_object; + } + + /* + * If the object is anonymous memory we need to split it + * so that we can apply the unique alloction property to + * this range. + */ + VM_OBJECT_WLOCK(object); + if (object->type == OBJT_DEFAULT || + object->type == OBJT_SWAP) { + vm_object_collapse(object); + if ((object->flags & OBJ_NOSPLIT) == 0) { + vm_object_split(current); + object = current->object.vm_object; + } + } + object->domain.dr_policy = domain; + VM_OBJECT_WUNLOCK(object); + vm_map_simplify_entry(map, current); + } +out: + vm_map_unlock(map); + + return (error); } #include "opt_ddb.h" Modified: user/jeff/numa/sys/vm/vm_map.h ============================================================================== --- user/jeff/numa/sys/vm/vm_map.h Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/vm/vm_map.h Sun Apr 1 04:11:38 2018 (r331861) @@ -403,5 +403,8 @@ int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_ int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags); long vmspace_swap_count(struct vmspace *vmspace); +struct domainset; +int vm_map_setdomain(vm_map_t, vm_offset_t, vm_offset_t, + struct domainset *, int); #endif /* _KERNEL */ #endif /* _VM_MAP_ */ Modified: user/jeff/numa/sys/vm/vm_object.c ============================================================================== --- user/jeff/numa/sys/vm/vm_object.c Sun Apr 1 01:21:00 2018 (r331860) +++ user/jeff/numa/sys/vm/vm_object.c Sun Apr 1 04:11:38 2018 (r331861) @@ -1328,7 +1328,6 @@ vm_object_shadow( result->backing_object_offset = *offset; if (source != NULL) { VM_OBJECT_WLOCK(source); - result->domain = source->domain; LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list); source->shadow_count++; #if VM_NRESERVLEVEL > 0
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201804010411.w314Bc5p043555>