Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 5 Jan 2017 09:16:02 +0000
From:      Steven Hartland <steven@multiplay.co.uk>
To:        Mark Johnston <markj@FreeBSD.org>, src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r311346 - in head/sys: kern sys vm
Message-ID:  <3cf413dd-f8d5-4462-9486-0a56adf08b77@freebsd.org>
In-Reply-To: <201701050144.v051iCso008577@repo.freebsd.org>
References:  <201701050144.v051iCso008577@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
Given the use of the number of CPU's for sizing would this play nice 
with hot plug CPU's?

     Regards
     Steve

On 05/01/2017 01:44, Mark Johnston wrote:
> Author: markj
> Date: Thu Jan  5 01:44:12 2017
> New Revision: 311346
> URL: https://svnweb.freebsd.org/changeset/base/311346
>
> Log:
>    Add a small allocator for exec_map entries.
>    
>    Upon each execve, we allocate a KVA range for use in copying data to the
>    new image. Pages must be faulted into the range, and when the range is
>    freed, the backing pages are freed and their mappings are destroyed. This
>    is a lot of needless overhead, and the exec_map management becomes a
>    bottleneck when many CPUs are executing execve concurrently. Moreover, the
>    number of available ranges is fixed at 16, which is insufficient on large
>    systems and potentially excessive on 32-bit systems.
>    
>    The new allocator reduces overhead by making exec_map allocations
>    persistent. When a range is freed, pages backing the range are marked clean
>    and made easy to reclaim. With this change, the exec_map is sized based on
>    the number of CPUs.
>    
>    Reviewed by:	kib
>    MFC after:	1 month
>    Differential Revision:	https://reviews.freebsd.org/D8921
>
> Modified:
>    head/sys/kern/kern_exec.c
>    head/sys/sys/imgact.h
>    head/sys/vm/vm_init.c
>    head/sys/vm/vm_kern.c
>    head/sys/vm/vm_kern.h
>
> Modified: head/sys/kern/kern_exec.c
> ==============================================================================
> --- head/sys/kern/kern_exec.c	Thu Jan  5 01:28:08 2017	(r311345)
> +++ head/sys/kern/kern_exec.c	Thu Jan  5 01:44:12 2017	(r311346)
> @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
>   #include <sys/kernel.h>
>   #include <sys/lock.h>
>   #include <sys/malloc.h>
> +#include <sys/mman.h>
>   #include <sys/mount.h>
>   #include <sys/mutex.h>
>   #include <sys/namei.h>
> @@ -59,6 +60,7 @@ __FBSDID("$FreeBSD$");
>   #include <sys/sf_buf.h>
>   #include <sys/shm.h>
>   #include <sys/signalvar.h>
> +#include <sys/smp.h>
>   #include <sys/stat.h>
>   #include <sys/syscallsubr.h>
>   #include <sys/sysctl.h>
> @@ -1315,17 +1317,80 @@ err_exit:
>   	return (error);
>   }
>   
> +struct exec_args_kva {
> +	vm_offset_t addr;
> +	SLIST_ENTRY(exec_args_kva) next;
> +};
> +
> +static DPCPU_DEFINE(struct exec_args_kva *, exec_args_kva);
> +
> +static SLIST_HEAD(, exec_args_kva) exec_args_kva_freelist;
> +static struct mtx exec_args_kva_mtx;
> +
> +static void
> +exec_prealloc_args_kva(void *arg __unused)
> +{
> +	struct exec_args_kva *argkva;
> +	u_int i;
> +
> +	SLIST_INIT(&exec_args_kva_freelist);
> +	mtx_init(&exec_args_kva_mtx, "exec args kva", NULL, MTX_DEF);
> +	for (i = 0; i < exec_map_entries; i++) {
> +		argkva = malloc(sizeof(*argkva), M_PARGS, M_WAITOK);
> +		argkva->addr = kmap_alloc_wait(exec_map, exec_map_entry_size);
> +		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
> +	}
> +}
> +SYSINIT(exec_args_kva, SI_SUB_EXEC, SI_ORDER_ANY, exec_prealloc_args_kva, NULL);
> +
> +static vm_offset_t
> +exec_alloc_args_kva(void **cookie)
> +{
> +	struct exec_args_kva *argkva;
> +
> +	argkva = (void *)atomic_readandclear_ptr(
> +	    (uintptr_t *)DPCPU_PTR(exec_args_kva));
> +	if (argkva == NULL) {
> +		mtx_lock(&exec_args_kva_mtx);
> +		while ((argkva = SLIST_FIRST(&exec_args_kva_freelist)) == NULL)
> +			(void)mtx_sleep(&exec_args_kva_freelist,
> +			    &exec_args_kva_mtx, 0, "execkva", 0);
> +		SLIST_REMOVE_HEAD(&exec_args_kva_freelist, next);
> +		mtx_unlock(&exec_args_kva_mtx);
> +	}
> +	*(struct exec_args_kva **)cookie = argkva;
> +	return (argkva->addr);
> +}
> +
> +static void
> +exec_free_args_kva(void *cookie)
> +{
> +	struct exec_args_kva *argkva;
> +	vm_offset_t base;
> +
> +	argkva = cookie;
> +	base = argkva->addr;
> +
> +	vm_map_madvise(exec_map, base, base + exec_map_entry_size, MADV_FREE);
> +	if (!atomic_cmpset_ptr((uintptr_t *)DPCPU_PTR(exec_args_kva),
> +	    (uintptr_t)NULL, (uintptr_t)argkva)) {
> +		mtx_lock(&exec_args_kva_mtx);
> +		SLIST_INSERT_HEAD(&exec_args_kva_freelist, argkva, next);
> +		wakeup_one(&exec_args_kva_freelist);
> +		mtx_unlock(&exec_args_kva_mtx);
> +	}
> +}
> +
>   /*
>    * Allocate temporary demand-paged, zero-filled memory for the file name,
> - * argument, and environment strings.  Returns zero if the allocation succeeds
> - * and ENOMEM otherwise.
> + * argument, and environment strings.
>    */
>   int
>   exec_alloc_args(struct image_args *args)
>   {
>   
> -	args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX);
> -	return (args->buf != NULL ? 0 : ENOMEM);
> +	args->buf = (char *)exec_alloc_args_kva(&args->bufkva);
> +	return (0);
>   }
>   
>   void
> @@ -1333,8 +1398,7 @@ exec_free_args(struct image_args *args)
>   {
>   
>   	if (args->buf != NULL) {
> -		kmap_free_wakeup(exec_map, (vm_offset_t)args->buf,
> -		    PATH_MAX + ARG_MAX);
> +		exec_free_args_kva(args->bufkva);
>   		args->buf = NULL;
>   	}
>   	if (args->fname_buf != NULL) {
>
> Modified: head/sys/sys/imgact.h
> ==============================================================================
> --- head/sys/sys/imgact.h	Thu Jan  5 01:28:08 2017	(r311345)
> +++ head/sys/sys/imgact.h	Thu Jan  5 01:44:12 2017	(r311346)
> @@ -42,6 +42,7 @@ struct ucred;
>   
>   struct image_args {
>   	char *buf;		/* pointer to string buffer */
> +	void *bufkva;		/* cookie for string buffer KVA */
>   	char *begin_argv;	/* beginning of argv in buf */
>   	char *begin_envv;	/* beginning of envv in buf */
>   	char *endp;		/* current `end' pointer of arg & env strings */
>
> Modified: head/sys/vm/vm_init.c
> ==============================================================================
> --- head/sys/vm/vm_init.c	Thu Jan  5 01:28:08 2017	(r311345)
> +++ head/sys/vm/vm_init.c	Thu Jan  5 01:44:12 2017	(r311346)
> @@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$");
>   
>   #include <sys/param.h>
>   #include <sys/kernel.h>
> +#include <sys/libkern.h>
>   #include <sys/lock.h>
>   #include <sys/proc.h>
>   #include <sys/rwlock.h>
> @@ -91,10 +92,6 @@ __FBSDID("$FreeBSD$");
>   
>   long physmem;
>   
> -static int exec_map_entries = 16;
> -SYSCTL_INT(_vm, OID_AUTO, exec_map_entries, CTLFLAG_RDTUN, &exec_map_entries, 0,
> -    "Maximum number of simultaneous execs");
> -
>   /*
>    * System initialization
>    */
> @@ -271,10 +268,19 @@ again:
>   		panic("Clean map calculation incorrect");
>   
>   	/*
> - 	 * Allocate the pageable submaps.
> +	 * Allocate the pageable submaps.  We may cache an exec map entry per
> +	 * CPU, so we therefore need to reserve space for at least ncpu+1
> +	 * entries to avoid deadlock.  The exec map is also used by some image
> +	 * activators, so we leave a fixed number of pages for their use.
>   	 */
> +#ifdef __LP64__
> +	exec_map_entries = 8 * mp_ncpus;
> +#else
> +	exec_map_entries = min(8 * mp_ncpus, 2 * mp_ncpus + 4);
> +#endif
> +	exec_map_entry_size = round_page(PATH_MAX + ARG_MAX);
>   	exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
> -	    exec_map_entries * round_page(PATH_MAX + ARG_MAX), FALSE);
> +	    exec_map_entries * exec_map_entry_size + 64 * PAGE_SIZE, FALSE);
>   	pipe_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, maxpipekva,
>   	    FALSE);
>   }
>
> Modified: head/sys/vm/vm_kern.c
> ==============================================================================
> --- head/sys/vm/vm_kern.c	Thu Jan  5 01:28:08 2017	(r311345)
> +++ head/sys/vm/vm_kern.c	Thu Jan  5 01:44:12 2017	(r311346)
> @@ -97,6 +97,9 @@ CTASSERT((ZERO_REGION_SIZE & PAGE_MASK)
>   /* NB: Used by kernel debuggers. */
>   const u_long vm_maxuser_address = VM_MAXUSER_ADDRESS;
>   
> +u_int exec_map_entry_size;
> +u_int exec_map_entries;
> +
>   SYSCTL_ULONG(_vm, OID_AUTO, min_kernel_address, CTLFLAG_RD,
>       SYSCTL_NULL_ULONG_PTR, VM_MIN_KERNEL_ADDRESS, "Min kernel address");
>   
>
> Modified: head/sys/vm/vm_kern.h
> ==============================================================================
> --- head/sys/vm/vm_kern.h	Thu Jan  5 01:28:08 2017	(r311345)
> +++ head/sys/vm/vm_kern.h	Thu Jan  5 01:44:12 2017	(r311346)
> @@ -61,7 +61,7 @@
>    */
>   
>   #ifndef _VM_VM_KERN_H_
> -#define _VM_VM_KERN_H_ 1
> +#define	_VM_VM_KERN_H_
>   
>   /* Kernel memory management definitions. */
>   extern vm_map_t kernel_map;
> @@ -74,5 +74,7 @@ extern struct vmem *transient_arena;
>   extern struct vmem *memguard_arena;
>   extern vm_offset_t swapbkva;
>   extern u_long vm_kmem_size;
> +extern u_int exec_map_entries;
> +extern u_int exec_map_entry_size;
>   
> -#endif				/* _VM_VM_KERN_H_ */
> +#endif /* _VM_VM_KERN_H_ */
>




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?3cf413dd-f8d5-4462-9486-0a56adf08b77>