Date: Sat, 8 Jul 2017 01:56:48 +0000 (UTC) From: Mark Johnston <markj@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r320797 - in stable/11/sys: kern sys vm Message-ID: <201707080156.v681umTn027198@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: markj Date: Sat Jul 8 01:56:48 2017 New Revision: 320797 URL: https://svnweb.freebsd.org/changeset/base/320797 Log: MFC r311346, r311352, r313756: Add an allocator for KVA for execve arguments. Modified: stable/11/sys/kern/kern_exec.c stable/11/sys/sys/imgact.h stable/11/sys/vm/vm_init.c stable/11/sys/vm/vm_kern.c stable/11/sys/vm/vm_kern.h Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/kern/kern_exec.c ============================================================================== --- stable/11/sys/kern/kern_exec.c Fri Jul 7 22:00:39 2017 (r320796) +++ stable/11/sys/kern/kern_exec.c Sat Jul 8 01:56:48 2017 (r320797) @@ -50,6 +50,11 @@ __FBSDID("$FreeBSD$"); #include <sys/imgact_elf.h> #include <sys/wait.h> #include <sys/malloc.h> +#include <sys/mman.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/namei.h> +#include <sys/pioctl.h> #include <sys/priv.h> #include <sys/proc.h> #include <sys/pioctl.h> @@ -63,6 +68,10 @@ __FBSDID("$FreeBSD$"); #include <sys/syscallsubr.h> #include <sys/sysent.h> #include <sys/shm.h> +#include <sys/signalvar.h> +#include <sys/smp.h> +#include <sys/stat.h> +#include <sys/syscallsubr.h> #include <sys/sysctl.h> #include <sys/vnode.h> #include <sys/stat.h> @@ -1315,17 +1324,124 @@ err_exit: return (error); } +struct exec_args_kva { + vm_offset_t addr; + u_int gen; + SLIST_ENTRY(exec_args_kva) next; +}; + +static DPCPU_DEFINE(struct exec_args_kva *, exec_args_kva); + +static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist; +static struct mtx exec_args_kva_mtx; +static u_int exec_args_gen; + +static void +exec_prealloc_args_kva(void *arg __unused) +{ + struct exec_args_kva *argkva; + u_int i; + + SLIST_INIT(&exec_args_kva_freelist); + mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF); + for (i = 0; i < exec_map_entries; i++) { + argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK); + argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size); + argkva->gen = exec_args_gen; + SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); + } +} +SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL); + +static vm_offset_t +exec_alloc_args_kva(void **cookie) +{ + struct exec_args_kva *argkva; + + argkva = (void *)atomic_readandclear_ptr( + (uintptr_t *)DPCPU_PTR(exec_args_kva)); + if (argkva == NULL) { + mtx_lock(&exec_args_kva_mtx); + while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL) + (void)mtx_sleep(&exec_args_kva_freelist, + &exec_args_kva_mtx, 0, "execkva", 0); + SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next); + mtx_unlock(&exec_args_kva_mtx); + } + *(struct exec_args_kva **)cookie = argkva; + return (argkva->addr); +} + +static void +exec_release_args_kva(struct exec_args_kva *argkva, u_int gen) +{ + vm_offset_t base; + + base = argkva->addr; + if (argkva->gen != gen) { + vm_map_madvise(exec_map, base, base + exec_map_entry_size, + MADV_FREE); + argkva->gen = gen; + } + if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva), + (uintptr_t)NULL, (uintptr_t)argkva)) { + mtx_lock(&exec_args_kva_mtx); + SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next); + wakeup_one(&exec_args_kva_freelist); + mtx_unlock(&exec_args_kva_mtx); + } +} + +static void +exec_free_args_kva(void *cookie) +{ + + exec_release_args_kva(cookie, exec_args_gen); +} + +static void +exec_args_kva_lowmem(void *arg __unused) +{ + SLIST_HEAD(, exec_args_kva) head; + struct exec_args_kva *argkva; + u_int gen; + int i; + + gen = atomic_fetchadd_int(&exec_args_gen, 1) + 1; + + /* + * Force an madvise of each KVA range. Any currently allocated ranges + * will have MADV_FREE applied once they are freed. + */ + SLIST_INIT(&head); + mtx_lock(&exec_args_kva_mtx); + SLIST_SWAP(&head, &exec_args_kva_freelist, exec_args_kva); + mtx_unlock(&exec_args_kva_mtx); + while ((argkva = SLIST_FIRST(&head)) != NULL) { + SLIST_REMOVE_HEAD(&head, next); + exec_release_args_kva(argkva, gen); + } + + CPU_FOREACH(i) { + argkva = (void *)atomic_readandclear_ptr( + (uintptr_t *)DPCPU_ID_PTR(i, exec_args_kva)); + if (argkva != NULL) + exec_release_args_kva(argkva, gen); + } +} +EVENTHANDLER_DEFINE(vm_lowmem, exec_args_kva_lowmem, NULL, + EVENTHANDLER_PRI_ANY); + /* * Allocate temporary demand-paged, zero-filled memory for the file name, - * argument, and environment strings. Returns zero if the allocation succeeds - * and ENOMEM otherwise. + * argument, and environment strings. */ int exec_alloc_args(struct image_args *args) { - args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX); - return (args->buf != NULL ? 0 : ENOMEM); + args->buf = (char *)exec_alloc_args_kva(&args->bufkva); + return (0); } void @@ -1333,8 +1449,7 @@ exec_free_args(struct image_args *args) { if (args->buf != NULL) { - kmap_free_wakeup(exec_map, (vm_offset_t)args->buf, - PATH_MAX + ARG_MAX); + exec_free_args_kva(args->bufkva); args->buf = NULL; } if (args->fname_buf != NULL) { Modified: stable/11/sys/sys/imgact.h ============================================================================== --- stable/11/sys/sys/imgact.h Fri Jul 7 22:00:39 2017 (r320796) +++ stable/11/sys/sys/imgact.h Sat Jul 8 01:56:48 2017 (r320797) @@ -42,6 +42,7 @@ struct ucred; struct image_args { char *buf; /* pointer to string buffer */ + void *bufkva; /* cookie for string buffer KVA */ char *begin_argv; /* beginning of argv in buf */ char *begin_envv; /* beginning of envv in buf */ char *endp; /* current `end' pointer of arg & env strings */ Modified: stable/11/sys/vm/vm_init.c ============================================================================== --- stable/11/sys/vm/vm_init.c Fri Jul 7 22:00:39 2017 (r320796) +++ stable/11/sys/vm/vm_init.c Sat Jul 8 01:56:48 2017 (r320797) @@ -91,10 +91,6 @@ __FBSDID("$FreeBSD$"); long physmem; -static int exec_map_entries = 16; -SYSCTL_INT(_vm, OID_AUTO, exec_map_entries, CTLFLAG_RDTUN, &exec_map_entries, 0, - "Maximum number of simultaneous execs"); - /* * System initialization */ @@ -261,10 +257,19 @@ again: panic("Clean map calculation incorrect"); /* - * Allocate the pageable submaps. + * Allocate the pageable submaps. We may cache an exec map entry per + * CPU, so we therefore need to reserve space for at least ncpu+1 + * entries to avoid deadlock. The exec map is also used by some image + * activators, so we leave a fixed number of pages for their use. */ +#ifdef __LP64__ + exec_map_entries = 8 * mp_ncpus; +#else + exec_map_entries = 2 * mp_ncpus + 4; +#endif + exec_map_entry_size = round_page(PATH_MAX + ARG_MAX); exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, - exec_map_entries * round_page(PATH_MAX + ARG_MAX), FALSE); + exec_map_entries * exec_map_entry_size + 64 * PAGE_SIZE, FALSE); pipe_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, maxpipekva, FALSE); } Modified: stable/11/sys/vm/vm_kern.c ============================================================================== --- stable/11/sys/vm/vm_kern.c Fri Jul 7 22:00:39 2017 (r320796) +++ stable/11/sys/vm/vm_kern.c Sat Jul 8 01:56:48 2017 (r320797) @@ -97,6 +97,9 @@ CTASSERT((ZERO_REGION_SIZE & PAGE_MASK) == 0); /* NB: Used by kernel debuggers. */ const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS; +u_int exec_map_entry_size; +u_int exec_map_entries; + SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD, SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address"); Modified: stable/11/sys/vm/vm_kern.h ============================================================================== --- stable/11/sys/vm/vm_kern.h Fri Jul 7 22:00:39 2017 (r320796) +++ stable/11/sys/vm/vm_kern.h Sat Jul 8 01:56:48 2017 (r320797) @@ -61,7 +61,7 @@ */ #ifndef _VM_VM_KERN_H_ -#define _VM_VM_KERN_H_ 1 +#define _VM_VM_KERN_H_ /* Kernel memory management definitions. */ extern vm_map_t kernel_map; @@ -74,5 +74,7 @@ extern struct vmem *transient_arena; extern struct vmem *memguard_arena; extern vm_offset_t swapbkva; extern u_long vm_kmem_size; +extern u_int exec_map_entries; +extern u_int exec_map_entry_size; -#endif /* _VM_VM_KERN_H_ */ +#endif /* _VM_VM_KERN_H_ */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201707080156.v681umTn027198>