From owner-svn-src-user@freebsd.org Sun Nov 19 03:18:31 2017 Return-Path: Delivered-To: svn-src-user@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 68A8DDE8C93 for ; Sun, 19 Nov 2017 03:18:31 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 2084E6891B; Sun, 19 Nov 2017 03:18:31 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vAJ3IT9V087901; Sun, 19 Nov 2017 03:18:29 GMT (envelope-from jeff@FreeBSD.org) Received: (from jeff@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vAJ3IT01087896; Sun, 19 Nov 2017 03:18:29 GMT (envelope-from jeff@FreeBSD.org) Message-Id: <201711190318.vAJ3IT01087896@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jeff set sender to jeff@FreeBSD.org using -f From: Jeff Roberson Date: Sun, 19 Nov 2017 03:18:29 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r325992 - in user/jeff/numa/sys: kern vm X-SVN-Group: user X-SVN-Commit-Author: jeff X-SVN-Commit-Paths: in user/jeff/numa/sys: kern vm X-SVN-Commit-Revision: 325992 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 19 Nov 2017 03:18:31 -0000 Author: jeff Date: Sun Nov 19 03:18:29 2017 New Revision: 325992 URL: https://svnweb.freebsd.org/changeset/base/325992 Log: Use a vmem per-domain kernel vmem to ensure that reservations and domain specific allocations are honored for kernel memory. Provide domain iterators for kmem along with domain specific allocation functions. Modified: user/jeff/numa/sys/kern/subr_vmem.c user/jeff/numa/sys/vm/vm_extern.h user/jeff/numa/sys/vm/vm_init.c user/jeff/numa/sys/vm/vm_kern.c user/jeff/numa/sys/vm/vm_page.h Modified: user/jeff/numa/sys/kern/subr_vmem.c ============================================================================== --- user/jeff/numa/sys/kern/subr_vmem.c Sun Nov 19 03:14:10 2017 (r325991) +++ user/jeff/numa/sys/kern/subr_vmem.c Sun Nov 19 03:18:29 2017 (r325992) @@ -184,6 +184,7 @@ static struct task vmem_periodic_wk; static struct mtx_padalign __exclusive_cache_line vmem_list_lock; static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); +static uma_zone_t vmem_zone; /* ---- misc */ #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) @@ -655,6 +656,9 @@ vmem_startup(void) { mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF); + vmem_zone = uma_zcreate("vmem", + sizeof(struct vmem), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_VM); vmem_bt_zone = uma_zcreate("vmem btag", sizeof(struct vmem_btag), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); @@ -824,7 +828,7 @@ vmem_destroy1(vmem_t *vm) VMEM_CONDVAR_DESTROY(vm); VMEM_LOCK_DESTROY(vm); - free(vm, M_VMEM); + uma_zfree(vmem_zone, vm); } static int @@ -1056,7 +1060,7 @@ vmem_create(const char *name, vmem_addr_t base, vmem_s vmem_t *vm; - vm = malloc(sizeof(*vm), M_VMEM, flags & (M_WAITOK|M_NOWAIT)); + vm = uma_zalloc(vmem_zone, flags & (M_WAITOK|M_NOWAIT)); if (vm == NULL) return (NULL); if (vmem_init(vm, name, base, size, quantum, qcache_max, Modified: user/jeff/numa/sys/vm/vm_extern.h ============================================================================== --- user/jeff/numa/sys/vm/vm_extern.h Sun Nov 19 03:14:10 2017 (r325991) +++ user/jeff/numa/sys/vm/vm_extern.h Sun Nov 19 03:18:29 2017 (r325992) @@ -54,14 +54,21 @@ void kmap_free_wakeup(vm_map_t, vm_offset_t, vm_size_t /* These operate on virtual addresses backed by memory. */ vm_offset_t kmem_alloc_attr(struct vmem *, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); +vm_offset_t kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, + vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); vm_offset_t kmem_alloc_contig(struct vmem *, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); +vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr); vm_offset_t kmem_malloc(struct vmem *, vm_size_t size, int flags); +vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags); void kmem_free(struct vmem *, vm_offset_t, vm_size_t); /* This provides memory for previously allocated address space. */ int kmem_back(vm_object_t, vm_offset_t, vm_size_t, int); +int kmem_back_domain(int, vm_object_t, vm_offset_t, vm_size_t, int); void kmem_unback(vm_object_t, vm_offset_t, vm_size_t); /* Bootstrapping. */ Modified: user/jeff/numa/sys/vm/vm_init.c ============================================================================== --- user/jeff/numa/sys/vm/vm_init.c Sun Nov 19 03:14:10 2017 (r325991) +++ user/jeff/numa/sys/vm/vm_init.c Sun Nov 19 03:18:29 2017 (r325992) @@ -79,16 +79,25 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include #include +#include #include #include #include + +#if VM_NRESERVLEVEL > 0 +#define KVA_QUANTUM 1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT) +#else + /* On non-superpage architectures want large import sizes. */ +#define KVA_QUANTUM PAGE_SIZE * 1024 +#endif long physmem; /* @@ -128,6 +137,7 @@ static void vm_mem_init(dummy) void *dummy; { + int domain; /* * Initializes resident memory structures. From here on, all physical @@ -148,13 +158,15 @@ vm_mem_init(dummy) * Initialize the kernel_arena. This can grow on demand. */ vmem_init(kernel_arena, "kernel arena", 0, 0, PAGE_SIZE, 0, 0); - vmem_set_import(kernel_arena, kva_import, NULL, NULL, -#if VM_NRESERVLEVEL > 0 - 1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT)); -#else - /* On non-superpage architectures want large import sizes. */ - PAGE_SIZE * 1024); -#endif + vmem_set_import(kernel_arena, kva_import, NULL, NULL, KVA_QUANTUM); + + for (domain = 0; domain < vm_ndomains; domain++) { + vm_dom[domain].vmd_kernel_arena = vmem_create( + "kernel arena domain", 0, 0, PAGE_SIZE, 0, M_WAITOK); + vmem_set_import(vm_dom[domain].vmd_kernel_arena, + (vmem_import_t *)vmem_alloc, NULL, kernel_arena, + KVA_QUANTUM); + } kmem_init_zero_region(); pmap_init(); Modified: user/jeff/numa/sys/vm/vm_kern.c ============================================================================== --- user/jeff/numa/sys/vm/vm_kern.c Sun Nov 19 03:14:10 2017 (r325991) +++ user/jeff/numa/sys/vm/vm_kern.c Sun Nov 19 03:18:29 2017 (r325992) @@ -75,8 +75,10 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include #include #include #include @@ -84,6 +86,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -159,17 +162,17 @@ kva_free(vm_offset_t addr, vm_size_t size) * given flags, then the pages are zeroed before they are mapped. */ vm_offset_t -kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, +kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr) { + vmem_t *vmem; vm_object_t object = kernel_object; vm_offset_t addr, i, offset; vm_page_t m; int pflags, tries; - KASSERT(vmem == kernel_arena, - ("kmem_alloc_attr: Only kernel_arena is supported.")); size = round_page(size); + vmem = vm_dom[domain].vmd_kernel_arena; if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; @@ -207,6 +210,36 @@ retry: return (addr); } +vm_offset_t +kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, vm_memattr_t memattr) +{ + struct vm_domain_iterator vi; + vm_offset_t addr; + int domain, wait; + + KASSERT(vmem == kernel_arena, + ("kmem_alloc_attr: Only kernel_arena is supported.")); + addr = 0; + vm_policy_iterator_init(&vi); + wait = flags & M_WAITOK; + flags &= ~M_WAITOK; + flags |= M_NOWAIT; + while ((vm_domain_iterator_run(&vi, &domain)) == 0) { + if (vm_domain_iterator_isdone(&vi) && wait) { + flags |= wait; + flags &= ~M_NOWAIT; + } + addr = kmem_alloc_attr_domain(domain, size, flags, low, high, + memattr); + if (addr != 0) + break; + } + vm_policy_iterator_finish(&vi); + + return (addr); +} + /* * Allocates a region from the kernel address map and physically * contiguous pages within the specified address range to the kernel @@ -216,19 +249,19 @@ retry: * mapped. */ vm_offset_t -kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, +kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr) { + vmem_t *vmem; vm_object_t object = kernel_object; vm_offset_t addr, offset, tmp; vm_page_t end_m, m; u_long npages; int pflags, tries; - KASSERT(vmem == kernel_arena, - ("kmem_alloc_contig: Only kernel_arena is supported.")); size = round_page(size); + vmem = vm_dom[domain].vmd_kernel_arena; if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); offset = addr - VM_MIN_KERNEL_ADDRESS; @@ -268,6 +301,37 @@ retry: return (addr); } +vm_offset_t +kmem_alloc_contig(struct vmem *vmem, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr) +{ + struct vm_domain_iterator vi; + vm_offset_t addr; + int domain, wait; + + KASSERT(vmem == kernel_arena, + ("kmem_alloc_contig: Only kernel_arena is supported.")); + addr = 0; + vm_policy_iterator_init(&vi); + wait = flags & M_WAITOK; + flags &= ~M_WAITOK; + flags |= M_NOWAIT; + while ((vm_domain_iterator_run(&vi, &domain)) == 0) { + if (vm_domain_iterator_isdone(&vi) && wait) { + flags |= wait; + flags &= ~M_NOWAIT; + } + addr = kmem_alloc_contig_domain(domain, size, flags, low, high, + alignment, boundary, memattr); + if (addr != 0) + break; + } + vm_policy_iterator_finish(&vi); + + return (addr); +} + /* * kmem_suballoc: * @@ -311,18 +375,18 @@ kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_of * Allocate wired-down pages in the kernel's address space. */ vm_offset_t -kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) +kmem_malloc_domain(int domain, vm_size_t size, int flags) { + vmem_t *vmem; vm_offset_t addr; int rv; - KASSERT(vmem == kernel_arena, - ("kmem_malloc: Only kernel_arena is supported.")); + vmem = vm_dom[domain].vmd_kernel_arena; size = round_page(size); if (vmem_alloc(vmem, size, flags | M_BESTFIT, &addr)) return (0); - rv = kmem_back(kernel_object, addr, size, flags); + rv = kmem_back_domain(domain, kernel_object, addr, size, flags); if (rv != KERN_SUCCESS) { vmem_free(vmem, addr, size); return (0); @@ -330,20 +394,49 @@ kmem_malloc(struct vmem *vmem, vm_size_t size, int fla return (addr); } +vm_offset_t +kmem_malloc(struct vmem *vmem, vm_size_t size, int flags) +{ + struct vm_domain_iterator vi; + vm_offset_t addr; + int domain, wait; + + KASSERT(vmem == kernel_arena, + ("kmem_malloc: Only kernel_arena is supported.")); + addr = 0; + vm_policy_iterator_init(&vi); + wait = flags & M_WAITOK; + flags &= ~M_WAITOK; + flags |= M_NOWAIT; + while ((vm_domain_iterator_run(&vi, &domain)) == 0) { + if (vm_domain_iterator_isdone(&vi) && wait) { + flags |= wait; + flags &= ~M_NOWAIT; + } + addr = kmem_malloc_domain(domain, size, flags); + if (addr != 0) + break; + } + vm_policy_iterator_finish(&vi); + + return (addr); +} + /* * kmem_back: * * Allocate physical pages for the specified virtual address range. */ int -kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) +kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr, + vm_size_t size, int flags) { vm_offset_t offset, i; vm_page_t m, mpred; int pflags; KASSERT(object == kernel_object, - ("kmem_back: only supports kernel object.")); + ("kmem_back_domain: only supports kernel object.")); offset = addr - VM_MIN_KERNEL_ADDRESS; pflags = malloc2vm_flags(flags) | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED; @@ -356,8 +449,8 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_siz retry: mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i)); for (; i < size; i += PAGE_SIZE, mpred = m) { - m = vm_page_alloc_after(object, atop(offset + i), pflags, - mpred); + m = vm_page_alloc_domain_after(object, atop(offset + i), + domain, pflags, mpred); /* * Ran out of space, free everything up and return. Don't need @@ -384,6 +477,33 @@ retry: return (KERN_SUCCESS); } +int +kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags) +{ + struct vm_domain_iterator vi; + int domain, wait, ret; + + KASSERT(object == kernel_object, + ("kmem_back: only supports kernel object.")); + ret = 0; + vm_policy_iterator_init(&vi); + wait = flags & M_WAITOK; + flags &= ~M_WAITOK; + flags |= M_NOWAIT; + while ((vm_domain_iterator_run(&vi, &domain)) == 0) { + if (vm_domain_iterator_isdone(&vi) && wait) { + flags |= wait; + flags &= ~M_NOWAIT; + } + ret = kmem_back_domain(domain, object, addr, size, flags); + if (ret == KERN_SUCCESS) + break; + } + vm_policy_iterator_finish(&vi); + + return (addr); +} + /* * kmem_unback: * @@ -393,11 +513,12 @@ retry: * A physical page must exist within the specified object at each index * that is being unmapped. */ -void -kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) +static int +_kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { vm_page_t m, next; vm_offset_t end, offset; + int domain; KASSERT(object == kernel_object, ("kmem_unback: only supports kernel object.")); @@ -406,15 +527,25 @@ kmem_unback(vm_object_t object, vm_offset_t addr, vm_s offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; VM_OBJECT_WLOCK(object); - for (m = vm_page_lookup(object, atop(offset)); offset < end; - offset += PAGE_SIZE, m = next) { + m = vm_page_lookup(object, atop(offset)); + domain = vm_phys_domidx(m); + for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); vm_page_unwire(m, PQ_NONE); vm_page_free(m); } VM_OBJECT_WUNLOCK(object); + + return domain; } +void +kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) +{ + + _kmem_unback(object, addr, size); +} + /* * kmem_free: * @@ -424,12 +555,13 @@ kmem_unback(vm_object_t object, vm_offset_t addr, vm_s void kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size) { + int domain; KASSERT(vmem == kernel_arena, ("kmem_free: Only kernel_arena is supported.")); size = round_page(size); - kmem_unback(kernel_object, addr, size); - vmem_free(vmem, addr, size); + domain = _kmem_unback(kernel_object, addr, size); + vmem_free(vm_dom[domain].vmd_kernel_arena, addr, size); } /* Modified: user/jeff/numa/sys/vm/vm_page.h ============================================================================== --- user/jeff/numa/sys/vm/vm_page.h Sun Nov 19 03:14:10 2017 (r325991) +++ user/jeff/numa/sys/vm/vm_page.h Sun Nov 19 03:18:29 2017 (r325992) @@ -227,6 +227,7 @@ struct vm_pagequeue { struct vm_domain { struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; + struct vmem *vmd_kernel_arena; u_int vmd_page_count; u_int vmd_free_count; long vmd_segs; /* bitmask of the segments */ From owner-svn-src-user@freebsd.org Mon Nov 20 02:26:22 2017 Return-Path: Delivered-To: svn-src-user@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 414CCDDE242 for ; Mon, 20 Nov 2017 02:26:22 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 1B3A56C3AE; Mon, 20 Nov 2017 02:26:22 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vAK2QLMo092027; Mon, 20 Nov 2017 02:26:21 GMT (envelope-from jeff@FreeBSD.org) Received: (from jeff@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vAK2QKET092022; Mon, 20 Nov 2017 02:26:20 GMT (envelope-from jeff@FreeBSD.org) Message-Id: <201711200226.vAK2QKET092022@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jeff set sender to jeff@FreeBSD.org using -f From: Jeff Roberson Date: Mon, 20 Nov 2017 02:26:20 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r326002 - user/jeff/numa/sys/vm X-SVN-Group: user X-SVN-Commit-Author: jeff X-SVN-Commit-Paths: user/jeff/numa/sys/vm X-SVN-Commit-Revision: 326002 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 20 Nov 2017 02:26:22 -0000 Author: jeff Date: Mon Nov 20 02:26:20 2017 New Revision: 326002 URL: https://svnweb.freebsd.org/changeset/base/326002 Log: Update for r325992 Not all files that include vm_phys.h include opt_vm.h, this gives different definitions for vm_phys_domain(). Add some asserts to guarantee that we're getting the domain we expect in a few cases that can cause crashes later. Modified: user/jeff/numa/sys/vm/vm_init.c user/jeff/numa/sys/vm/vm_kern.c user/jeff/numa/sys/vm/vm_phys.h user/jeff/numa/sys/vm/vm_reserv.c Modified: user/jeff/numa/sys/vm/vm_init.c ============================================================================== --- user/jeff/numa/sys/vm/vm_init.c Sun Nov 19 20:18:21 2017 (r326001) +++ user/jeff/numa/sys/vm/vm_init.c Mon Nov 20 02:26:20 2017 (r326002) @@ -93,10 +93,10 @@ __FBSDID("$FreeBSD$"); #if VM_NRESERVLEVEL > 0 -#define KVA_QUANTUM 1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT) +#define KVA_QUANTUM (1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT)) #else /* On non-superpage architectures want large import sizes. */ -#define KVA_QUANTUM PAGE_SIZE * 1024 +#define KVA_QUANTUM (PAGE_SIZE * 1024) #endif long physmem; @@ -114,7 +114,10 @@ kva_import(void *unused, vmem_size_t size, int flags, { vm_offset_t addr; int result; - + + KASSERT((size % KVA_QUANTUM) == 0, + ("kva_import: Size %jd is not a multiple of %u", + size, KVA_QUANTUM)); addr = vm_map_min(kernel_map); result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0, VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); Modified: user/jeff/numa/sys/vm/vm_kern.c ============================================================================== --- user/jeff/numa/sys/vm/vm_kern.c Sun Nov 19 20:18:21 2017 (r326001) +++ user/jeff/numa/sys/vm/vm_kern.c Mon Nov 20 02:26:20 2017 (r326002) @@ -65,6 +65,8 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_vm.h" + #include #include #include /* for ticks and hz */ @@ -464,6 +466,9 @@ retry: kmem_unback(object, addr, i); return (KERN_NO_SPACE); } + KASSERT(vm_phys_domidx(m) == domain, + ("kmem_back_domain: Domain mismatch %d != %d", + vm_phys_domidx(m), domain)); if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); KASSERT((m->oflags & VPO_UNMANAGED) != 0, Modified: user/jeff/numa/sys/vm/vm_phys.h ============================================================================== --- user/jeff/numa/sys/vm/vm_phys.h Sun Nov 19 20:18:21 2017 (r326001) +++ user/jeff/numa/sys/vm/vm_phys.h Mon Nov 20 02:26:20 2017 (r326002) @@ -100,7 +100,6 @@ int vm_phys_mem_affinity(int f, int t); static inline int vm_phys_domidx(vm_page_t m) { -#ifdef VM_NUMA_ALLOC int domn, segind; /* XXXKIB try to assert that the page is managed */ @@ -109,9 +108,6 @@ vm_phys_domidx(vm_page_t m) domn = vm_phys_segs[segind].domain; KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m)); return (domn); -#else - return (0); -#endif } /* Modified: user/jeff/numa/sys/vm/vm_reserv.c ============================================================================== --- user/jeff/numa/sys/vm/vm_reserv.c Sun Nov 19 20:18:21 2017 (r326001) +++ user/jeff/numa/sys/vm/vm_reserv.c Mon Nov 20 02:26:20 2017 (r326002) @@ -713,7 +713,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p LIST_INSERT_HEAD(&object->rvq, rv, objq); rv->object = object; rv->pindex = first; - rv->domain = vm_phys_domidx(m); + rv->domain = domain; KASSERT(rv->popcnt == 0, ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv)); KASSERT(!rv->inpartpopq, @@ -732,6 +732,8 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p found: index = VM_RESERV_INDEX(object, pindex); m = &rv->pages[index]; + KASSERT(object != kernel_object || vm_phys_domidx(m) == domain, + ("vm_reserv_alloc_page: Domain mismatch from reservation.")); /* Handle vm_page_rename(m, new_object, ...). */ if (popmap_is_set(rv->popmap, index)) return (NULL); From owner-svn-src-user@freebsd.org Mon Nov 20 03:40:51 2017 Return-Path: Delivered-To: svn-src-user@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 5E50ADDF56E for ; Mon, 20 Nov 2017 03:40:51 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 2B1B66E1B1; Mon, 20 Nov 2017 03:40:51 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vAK3eo8r022231; Mon, 20 Nov 2017 03:40:50 GMT (envelope-from jeff@FreeBSD.org) Received: (from jeff@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vAK3eoro022230; Mon, 20 Nov 2017 03:40:50 GMT (envelope-from jeff@FreeBSD.org) Message-Id: <201711200340.vAK3eoro022230@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jeff set sender to jeff@FreeBSD.org using -f From: Jeff Roberson Date: Mon, 20 Nov 2017 03:40:50 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r326003 - user/jeff/numa/sys/vm X-SVN-Group: user X-SVN-Commit-Author: jeff X-SVN-Commit-Paths: user/jeff/numa/sys/vm X-SVN-Commit-Revision: 326003 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 20 Nov 2017 03:40:51 -0000 Author: jeff Date: Mon Nov 20 03:40:50 2017 New Revision: 326003 URL: https://svnweb.freebsd.org/changeset/base/326003 Log: Update for r325992 Use a cast so the format specifier works on all architectures. Modified: user/jeff/numa/sys/vm/vm_init.c Modified: user/jeff/numa/sys/vm/vm_init.c ============================================================================== --- user/jeff/numa/sys/vm/vm_init.c Mon Nov 20 02:26:20 2017 (r326002) +++ user/jeff/numa/sys/vm/vm_init.c Mon Nov 20 03:40:50 2017 (r326003) @@ -117,7 +117,7 @@ kva_import(void *unused, vmem_size_t size, int flags, KASSERT((size % KVA_QUANTUM) == 0, ("kva_import: Size %jd is not a multiple of %u", - size, KVA_QUANTUM)); + (intmax_t)size, KVA_QUANTUM)); addr = vm_map_min(kernel_map); result = vm_map_find(kernel_map, NULL, 0, &addr, size, 0, VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); From owner-svn-src-user@freebsd.org Tue Nov 21 21:16:49 2017 Return-Path: Delivered-To: svn-src-user@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id CD1AAD94F79 for ; Tue, 21 Nov 2017 21:16:49 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 97D0E8066F; Tue, 21 Nov 2017 21:16:49 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vALLGmhw081956; Tue, 21 Nov 2017 21:16:48 GMT (envelope-from jeff@FreeBSD.org) Received: (from jeff@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vALLGmV3081955; Tue, 21 Nov 2017 21:16:48 GMT (envelope-from jeff@FreeBSD.org) Message-Id: <201711212116.vALLGmV3081955@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jeff set sender to jeff@FreeBSD.org using -f From: Jeff Roberson Date: Tue, 21 Nov 2017 21:16:48 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r326076 - user/jeff/numa/sys/vm X-SVN-Group: user X-SVN-Commit-Author: jeff X-SVN-Commit-Paths: user/jeff/numa/sys/vm X-SVN-Commit-Revision: 326076 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 21 Nov 2017 21:16:49 -0000 Author: jeff Date: Tue Nov 21 21:16:48 2017 New Revision: 326076 URL: https://svnweb.freebsd.org/changeset/base/326076 Log: Update for r325992 Just return if the caller specifies a zero size. This can happen when kmem_back fails to allocate the first page. Modified: user/jeff/numa/sys/vm/vm_kern.c Modified: user/jeff/numa/sys/vm/vm_kern.c ============================================================================== --- user/jeff/numa/sys/vm/vm_kern.c Tue Nov 21 20:46:21 2017 (r326075) +++ user/jeff/numa/sys/vm/vm_kern.c Tue Nov 21 21:16:48 2017 (r326076) @@ -528,6 +528,8 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_ KASSERT(object == kernel_object, ("kmem_unback: only supports kernel object.")); + if (size == 0) + return 0; pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; end = offset + size; From owner-svn-src-user@freebsd.org Tue Nov 21 21:18:07 2017 Return-Path: Delivered-To: svn-src-user@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 61C4CD94FBC for ; Tue, 21 Nov 2017 21:18:07 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 126BE80760; Tue, 21 Nov 2017 21:18:06 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vALLI6SB082069; Tue, 21 Nov 2017 21:18:06 GMT (envelope-from jeff@FreeBSD.org) Received: (from jeff@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vALLI4sU082050; Tue, 21 Nov 2017 21:18:04 GMT (envelope-from jeff@FreeBSD.org) Message-Id: <201711212118.vALLI4sU082050@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jeff set sender to jeff@FreeBSD.org using -f From: Jeff Roberson Date: Tue, 21 Nov 2017 21:18:04 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r326077 - in user/jeff/numa: lib/libmemstat sys/amd64/amd64 sys/arm64/arm64 sys/i386/i386 sys/kern sys/mips/mips sys/powerpc/aim sys/powerpc/powerpc sys/riscv/riscv sys/sparc64/sparc64 ... X-SVN-Group: user X-SVN-Commit-Author: jeff X-SVN-Commit-Paths: in user/jeff/numa: lib/libmemstat sys/amd64/amd64 sys/arm64/arm64 sys/i386/i386 sys/kern sys/mips/mips sys/powerpc/aim sys/powerpc/powerpc sys/riscv/riscv sys/sparc64/sparc64 sys/sys sys/vm X-SVN-Commit-Revision: 326077 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 21 Nov 2017 21:18:07 -0000 Author: jeff Date: Tue Nov 21 21:18:04 2017 New Revision: 326077 URL: https://svnweb.freebsd.org/changeset/base/326077 Log: NUMAify UMA This is a slightly refactored version from the original projects/numa branch with better iterators and cleaner integration with the vm_page_alloc and kmem_*alloc layers. Modified: user/jeff/numa/lib/libmemstat/memstat_uma.c user/jeff/numa/sys/amd64/amd64/uma_machdep.c user/jeff/numa/sys/arm64/arm64/uma_machdep.c user/jeff/numa/sys/i386/i386/pmap.c user/jeff/numa/sys/kern/kern_mbuf.c user/jeff/numa/sys/kern/subr_busdma_bufalloc.c user/jeff/numa/sys/kern/subr_vmem.c user/jeff/numa/sys/kern/vfs_bio.c user/jeff/numa/sys/mips/mips/uma_machdep.c user/jeff/numa/sys/powerpc/aim/mmu_oea64.c user/jeff/numa/sys/powerpc/aim/slb.c user/jeff/numa/sys/powerpc/powerpc/uma_machdep.c user/jeff/numa/sys/riscv/riscv/uma_machdep.c user/jeff/numa/sys/sparc64/sparc64/vm_machdep.c user/jeff/numa/sys/sys/busdma_bufalloc.h user/jeff/numa/sys/vm/uma.h user/jeff/numa/sys/vm/uma_core.c user/jeff/numa/sys/vm/uma_int.h Modified: user/jeff/numa/lib/libmemstat/memstat_uma.c ============================================================================== --- user/jeff/numa/lib/libmemstat/memstat_uma.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/lib/libmemstat/memstat_uma.c Tue Nov 21 21:18:04 2017 (r326077) @@ -53,6 +53,8 @@ static struct nlist namelist[] = { { .n_name = "_mp_maxid" }, #define X_ALL_CPUS 2 { .n_name = "_all_cpus" }, +#define X_VM_NDOMAINS 3 + { .n_name = "_vm_ndomains" }, { .n_name = "" }, }; @@ -295,11 +297,12 @@ memstat_kvm_uma(struct memory_type_list *list, void *k { LIST_HEAD(, uma_keg) uma_kegs; struct memory_type *mtp; + struct uma_zone_domain uzd; struct uma_bucket *ubp, ub; struct uma_cache *ucp, *ucp_array; struct uma_zone *uzp, uz; struct uma_keg *kzp, kz; - int hint_dontsearch, i, mp_maxid, ret; + int hint_dontsearch, i, mp_maxid, ndomains, ret; char name[MEMTYPE_MAXNAME]; cpuset_t all_cpus; long cpusetsize; @@ -321,6 +324,12 @@ memstat_kvm_uma(struct memory_type_list *list, void *k list->mtl_error = ret; return (-1); } + ret = kread_symbol(kvm, X_VM_NDOMAINS, &ndomains, + sizeof(ndomains), 0); + if (ret != 0) { + list->mtl_error = ret; + return (-1); + } ret = kread_symbol(kvm, X_UMA_KEGS, &uma_kegs, sizeof(uma_kegs), 0); if (ret != 0) { list->mtl_error = ret; @@ -445,10 +454,17 @@ skip_percpu: kz.uk_ipers; mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; - for (ubp = LIST_FIRST(&uz.uz_buckets); ubp != - NULL; ubp = LIST_NEXT(&ub, ub_link)) { - ret = kread(kvm, ubp, &ub, sizeof(ub), 0); - mtp->mt_zonefree += ub.ub_cnt; + for (i = 0; i < ndomains; i++) { + ret = kread(kvm, &uz.uz_domain[i], &uzd, + sizeof(uzd), 0); + for (ubp = + LIST_FIRST(&uzd.uzd_buckets); + ubp != NULL; + ubp = LIST_NEXT(&ub, ub_link)) { + ret = kread(kvm, ubp, &ub, + sizeof(ub), 0); + mtp->mt_zonefree += ub.ub_cnt; + } } if (!((kz.uk_flags & UMA_ZONE_SECONDARY) && LIST_FIRST(&kz.uk_zones) != uzp)) { Modified: user/jeff/numa/sys/amd64/amd64/uma_machdep.c ============================================================================== --- user/jeff/numa/sys/amd64/amd64/uma_machdep.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/amd64/amd64/uma_machdep.c Tue Nov 21 21:18:04 2017 (r326077) @@ -42,14 +42,15 @@ __FBSDID("$FreeBSD$"); #include void * -uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, + int wait) { vm_page_t m; vm_paddr_t pa; void *va; *flags = UMA_SLAB_PRIV; - m = vm_page_alloc(NULL, 0, + m = vm_page_alloc_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) return (NULL); Modified: user/jeff/numa/sys/arm64/arm64/uma_machdep.c ============================================================================== --- user/jeff/numa/sys/arm64/arm64/uma_machdep.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/arm64/arm64/uma_machdep.c Tue Nov 21 21:18:04 2017 (r326077) @@ -42,14 +42,15 @@ __FBSDID("$FreeBSD$"); #include void * -uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, + int wait) { vm_page_t m; vm_paddr_t pa; void *va; *flags = UMA_SLAB_PRIV; - m = vm_page_alloc(NULL, 0, + m = vm_page_alloc_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) return (NULL); Modified: user/jeff/numa/sys/i386/i386/pmap.c ============================================================================== --- user/jeff/numa/sys/i386/i386/pmap.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/i386/i386/pmap.c Tue Nov 21 21:18:04 2017 (r326077) @@ -342,8 +342,8 @@ static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offs static void pmap_pte_release(pt_entry_t *pte); static int pmap_unuse_pt(pmap_t, vm_offset_t, struct spglist *); #if defined(PAE) || defined(PAE_TABLES) -static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, - int wait); +static void *pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, + uint8_t *flags, int wait); #endif static void pmap_set_pg(void); @@ -703,12 +703,13 @@ pmap_page_init(vm_page_t m) #if defined(PAE) || defined(PAE_TABLES) static void * -pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) +pmap_pdpt_allocf(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, + int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ *flags = UMA_SLAB_KERNEL; - return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 0x0ULL, + return ((void *)kmem_alloc_contig_domain(domain, bytes, wait, 0x0ULL, 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); } #endif Modified: user/jeff/numa/sys/kern/kern_mbuf.c ============================================================================== --- user/jeff/numa/sys/kern/kern_mbuf.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/kern/kern_mbuf.c Tue Nov 21 21:18:04 2017 (r326077) @@ -281,7 +281,7 @@ static void mb_dtor_pack(void *, int, void *); static int mb_zinit_pack(void *, int, int); static void mb_zfini_pack(void *, int); static void mb_reclaim(uma_zone_t, int); -static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int); +static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); /* Ensure that MSIZE is a power of 2. */ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); @@ -384,12 +384,13 @@ SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, * pages. */ static void * -mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) +mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, + int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ *flags = UMA_SLAB_KERNEL; - return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, + return ((void *)kmem_alloc_contig_domain(domain, bytes, wait, (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); } Modified: user/jeff/numa/sys/kern/subr_busdma_bufalloc.c ============================================================================== --- user/jeff/numa/sys/kern/subr_busdma_bufalloc.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/kern/subr_busdma_bufalloc.c Tue Nov 21 21:18:04 2017 (r326077) @@ -147,7 +147,7 @@ busdma_bufalloc_findzone(busdma_bufalloc_t ba, bus_siz } void * -busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size, +busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size, int domain, uint8_t *pflag, int wait) { #ifdef VM_MEMATTR_UNCACHEABLE @@ -155,7 +155,7 @@ busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_ /* Inform UMA that this allocator uses kernel_arena/object. */ *pflag = UMA_SLAB_KERNEL; - return ((void *)kmem_alloc_attr(kernel_arena, size, wait, 0, + return ((void *)kmem_alloc_attr_domain(domain, size, wait, 0, BUS_SPACE_MAXADDR, VM_MEMATTR_UNCACHEABLE)); #else Modified: user/jeff/numa/sys/kern/subr_vmem.c ============================================================================== --- user/jeff/numa/sys/kern/subr_vmem.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/kern/subr_vmem.c Tue Nov 21 21:18:04 2017 (r326077) @@ -497,7 +497,7 @@ bt_insfree(vmem_t *vm, bt_t *bt) * Import from the arena into the quantum cache in UMA. */ static int -qc_import(void *arg, void **store, int cnt, int flags) +qc_import(void *arg, void **store, int cnt, int domain, int flags) { qcache_t *qc; vmem_addr_t addr; @@ -611,7 +611,8 @@ static struct mtx_padalign __exclusive_cache_line vmem * we are really out of KVA. */ static void * -vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) +vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, + int wait) { vmem_addr_t addr; @@ -625,7 +626,7 @@ vmem_bt_alloc(uma_zone_t zone, vm_size_t bytes, uint8_ if (vmem_xalloc(kernel_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, &addr) == 0) { - if (kmem_back(kernel_object, addr, bytes, + if (kmem_back_domain(domain, kernel_object, addr, bytes, M_NOWAIT | M_USE_RESERVE) == 0) { mtx_unlock(&vmem_bt_lock); return ((void *)addr); Modified: user/jeff/numa/sys/kern/vfs_bio.c ============================================================================== --- user/jeff/numa/sys/kern/vfs_bio.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/kern/vfs_bio.c Tue Nov 21 21:18:04 2017 (r326077) @@ -131,7 +131,7 @@ static __inline void bd_wakeup(void); static int sysctl_runningspace(SYSCTL_HANDLER_ARGS); static void bufkva_reclaim(vmem_t *, int); static void bufkva_free(struct buf *); -static int buf_import(void *, void **, int, int); +static int buf_import(void *, void **, int, int, int); static void buf_release(void *, void **, int); static void maxbcachebuf_adjust(void); @@ -1417,7 +1417,7 @@ buf_free(struct buf *bp) * only as a per-cpu cache of bufs still maintained on a global list. */ static int -buf_import(void *arg, void **store, int cnt, int flags) +buf_import(void *arg, void **store, int cnt, int domain, int flags) { struct buf *bp; int i; Modified: user/jeff/numa/sys/mips/mips/uma_machdep.c ============================================================================== --- user/jeff/numa/sys/mips/mips/uma_machdep.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/mips/mips/uma_machdep.c Tue Nov 21 21:18:04 2017 (r326077) @@ -42,7 +42,8 @@ __FBSDID("$FreeBSD$"); #include void * -uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, + int wait) { vm_paddr_t pa; vm_page_t m; @@ -57,7 +58,8 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_in #endif for (;;) { - m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, pflags); + m = vm_page_alloc_freelist_domain(domain, VM_FREELIST_DIRECT, + pflags); #ifndef __mips_n64 if (m == NULL && vm_page_reclaim_contig(pflags, 1, 0, MIPS_KSEG0_LARGEST_PHYS, PAGE_SIZE, 0)) Modified: user/jeff/numa/sys/powerpc/aim/mmu_oea64.c ============================================================================== --- user/jeff/numa/sys/powerpc/aim/mmu_oea64.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/powerpc/aim/mmu_oea64.c Tue Nov 21 21:18:04 2017 (r326077) @@ -1500,8 +1500,8 @@ retry: static mmu_t installed_mmu; static void * -moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, - int wait) +moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, + uint8_t *flags, int wait) { struct pvo_entry *pvo; vm_offset_t va; @@ -1518,7 +1518,7 @@ moea64_uma_page_alloc(uma_zone_t zone, vm_size_t bytes *flags = UMA_SLAB_PRIV; needed_lock = !PMAP_LOCKED(kernel_pmap); - m = vm_page_alloc(NULL, 0, + m = vm_page_alloc_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); if (m == NULL) return (NULL); Modified: user/jeff/numa/sys/powerpc/aim/slb.c ============================================================================== --- user/jeff/numa/sys/powerpc/aim/slb.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/powerpc/aim/slb.c Tue Nov 21 21:18:04 2017 (r326077) @@ -478,7 +478,8 @@ slb_insert_user(pmap_t pm, struct slb *slb) } static void * -slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) +slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, int domain, + u_int8_t *flags, int wait) { static vm_offset_t realmax = 0; void *va; @@ -488,7 +489,7 @@ slb_uma_real_alloc(uma_zone_t zone, vm_size_t bytes, u realmax = platform_real_maxaddr(); *flags = UMA_SLAB_PRIV; - m = vm_page_alloc_contig(NULL, 0, + m = vm_page_alloc_contig_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED, 1, 0, realmax, PAGE_SIZE, PAGE_SIZE, VM_MEMATTR_DEFAULT); if (m == NULL) Modified: user/jeff/numa/sys/powerpc/powerpc/uma_machdep.c ============================================================================== --- user/jeff/numa/sys/powerpc/powerpc/uma_machdep.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/powerpc/powerpc/uma_machdep.c Tue Nov 21 21:18:04 2017 (r326077) @@ -51,7 +51,8 @@ SYSCTL_INT(_hw, OID_AUTO, uma_mdpages, CTLFLAG_RD, &hw "UMA MD pages in use"); void * -uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, + int wait) { void *va; vm_paddr_t pa; @@ -59,7 +60,7 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_in *flags = UMA_SLAB_PRIV; - m = vm_page_alloc(NULL, 0, + m = vm_page_alloc_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); if (m == NULL) return (NULL); Modified: user/jeff/numa/sys/riscv/riscv/uma_machdep.c ============================================================================== --- user/jeff/numa/sys/riscv/riscv/uma_machdep.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/riscv/riscv/uma_machdep.c Tue Nov 21 21:18:04 2017 (r326077) @@ -41,7 +41,8 @@ __FBSDID("$FreeBSD$"); #include void * -uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, + int wait) { panic("uma_small_alloc"); Modified: user/jeff/numa/sys/sparc64/sparc64/vm_machdep.c ============================================================================== --- user/jeff/numa/sys/sparc64/sparc64/vm_machdep.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/sparc64/sparc64/vm_machdep.c Tue Nov 21 21:18:04 2017 (r326077) @@ -390,7 +390,8 @@ swi_vm(void *v) } void * -uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_int8_t *flags, int wait) +uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, + int wait) { vm_paddr_t pa; vm_page_t m; @@ -400,7 +401,7 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, u_in *flags = UMA_SLAB_PRIV; - m = vm_page_alloc(NULL, 0, + m = vm_page_alloc_domain(NULL, 0, domain, malloc2vm_flags(wait) | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ); if (m == NULL) return (NULL); Modified: user/jeff/numa/sys/sys/busdma_bufalloc.h ============================================================================== --- user/jeff/numa/sys/sys/busdma_bufalloc.h Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/sys/busdma_bufalloc.h Tue Nov 21 21:18:04 2017 (r326077) @@ -111,7 +111,7 @@ struct busdma_bufzone * busdma_bufalloc_findzone(busdm * you can probably use these when you need uncacheable buffers. */ void * busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, vm_size_t size, - uint8_t *pflag, int wait); + int domain, uint8_t *pflag, int wait); void busdma_bufalloc_free_uncacheable(void *item, vm_size_t size, uint8_t pflag); Modified: user/jeff/numa/sys/vm/uma.h ============================================================================== --- user/jeff/numa/sys/vm/uma.h Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/vm/uma.h Tue Nov 21 21:18:04 2017 (r326077) @@ -45,6 +45,7 @@ /* Types and type defs */ struct uma_zone; +struct vm_domain_iterator; /* Opaque type used as a handle to the zone */ typedef struct uma_zone * uma_zone_t; @@ -126,7 +127,8 @@ typedef void (*uma_fini)(void *mem, int size); /* * Import new memory into a cache zone. */ -typedef int (*uma_import)(void *arg, void **store, int count, int flags); +typedef int (*uma_import)(void *arg, void **store, int count, int domain, + int flags); /* * Free memory from a cache zone. @@ -279,6 +281,10 @@ uma_zone_t uma_zcache_create(char *name, int size, uma * Allocates mp_maxid + 1 slabs sized to * sizeof(struct pcpu). */ +#define UMA_ZONE_NUMA 0x10000 /* + * NUMA aware Zone. Implements a best + * effort first-touch policy. + */ /* * These flags are shared between the keg and zone. In zones wishing to add @@ -371,25 +377,21 @@ uma_zfree(uma_zone_t zone, void *item) void uma_zwait(uma_zone_t zone); /* - * XXX The rest of the prototypes in this header are h0h0 magic for the VM. - * If you think you need to use it for a normal zone you're probably incorrect. - */ - -/* * Backend page supplier routines * * Arguments: * zone The zone that is requesting pages. * size The number of bytes being requested. * pflag Flags for these memory pages, see below. + * domain The NUMA domain that we prefer for this allocation. * wait Indicates our willingness to block. * * Returns: * A pointer to the allocated memory or NULL on failure. */ -typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, uint8_t *pflag, - int wait); +typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain, + uint8_t *pflag, int wait); /* * Backend page free routines @@ -403,8 +405,6 @@ typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t * None */ typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag); - - /* * Sets up the uma allocator. (Called by vm_mem_init) Modified: user/jeff/numa/sys/vm/uma_core.c ============================================================================== --- user/jeff/numa/sys/vm/uma_core.c Tue Nov 21 21:16:48 2017 (r326076) +++ user/jeff/numa/sys/vm/uma_core.c Tue Nov 21 21:18:04 2017 (r326077) @@ -76,10 +76,12 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include #include +#include #include #include #include @@ -94,17 +96,12 @@ __FBSDID("$FreeBSD$"); #endif /* - * This is the zone and keg from which all zones are spawned. The idea is that - * even the zone & keg heads are allocated from the allocator, so we use the - * bss section to bootstrap us. + * This is the zone and keg from which all zones are spawned. */ -static struct uma_keg masterkeg; -static struct uma_zone masterzone_k; -static struct uma_zone masterzone_z; -static uma_zone_t kegs = &masterzone_k; -static uma_zone_t zones = &masterzone_z; +static uma_zone_t kegs; +static uma_zone_t zones; -/* This is the zone from which all of uma_slab_t's are allocated. */ +/* This is the zone from which all offpage uma_slab_ts are allocated. */ static uma_zone_t slabzone; /* @@ -225,11 +222,11 @@ enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI } /* Prototypes.. */ -static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int); -static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int); -static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int); +static void *noobj_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); +static void *page_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); +static void *startup_alloc(uma_zone_t, vm_size_t, int, uint8_t *, int); static void page_free(void *, vm_size_t, uint8_t); -static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int); +static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int, int); static void cache_drain(uma_zone_t); static void bucket_drain(uma_zone_t, uma_bucket_t); static void bucket_cache_drain(uma_zone_t zone); @@ -247,23 +244,23 @@ static int hash_expand(struct uma_hash *, struct uma_h static void hash_free(struct uma_hash *hash); static void uma_timeout(void *); static void uma_startup3(void); -static void *zone_alloc_item(uma_zone_t, void *, int); +static void *zone_alloc_item(uma_zone_t, void *, int, int); static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip); static void bucket_enable(void); static void bucket_init(void); static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int); static void bucket_free(uma_zone_t zone, uma_bucket_t, void *); static void bucket_zone_drain(void); -static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags); -static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags); -static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags); +static uma_bucket_t zone_alloc_bucket(uma_zone_t, void *, int, int); +static uma_slab_t zone_fetch_slab(uma_zone_t, uma_keg_t, int, int); +static uma_slab_t zone_fetch_slab_multi(uma_zone_t, uma_keg_t, int, int); static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab); static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item); static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, int align, uint32_t flags); -static int zone_import(uma_zone_t zone, void **bucket, int max, int flags); -static void zone_release(uma_zone_t zone, void **bucket, int cnt); -static void uma_zero_item(void *item, uma_zone_t zone); +static int zone_import(uma_zone_t, void **, int, int, int); +static void zone_release(uma_zone_t, void **, int); +static void uma_zero_item(void *, uma_zone_t); void uma_print_zone(uma_zone_t); void uma_print_stats(void); @@ -329,7 +326,7 @@ bucket_init(void) size += sizeof(void *) * ubz->ubz_entries; ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, - UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET); + UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET | UMA_ZONE_NUMA); } } @@ -566,7 +563,7 @@ hash_alloc(struct uma_hash *hash) M_UMAHASH, M_NOWAIT); } else { alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; - hash->uh_slab_hash = zone_alloc_item(hashzone, NULL, + hash->uh_slab_hash = zone_alloc_item(hashzone, NULL, 0, M_WAITOK); hash->uh_hashsize = UMA_HASH_SIZE_INIT; } @@ -733,6 +730,7 @@ cache_drain_safe_cpu(uma_zone_t zone) { uma_cache_t cache; uma_bucket_t b1, b2; + int domain; if (zone->uz_flags & UMA_ZFLAG_INTERNAL) return; @@ -740,10 +738,14 @@ cache_drain_safe_cpu(uma_zone_t zone) b1 = b2 = NULL; ZONE_LOCK(zone); critical_enter(); + if (zone->uz_flags & UMA_ZONE_NUMA) + domain = PCPU_GET(domain); + else + domain = 0; cache = &zone->uz_cpu[curcpu]; if (cache->uc_allocbucket) { if (cache->uc_allocbucket->ub_cnt != 0) - LIST_INSERT_HEAD(&zone->uz_buckets, + LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets, cache->uc_allocbucket, ub_link); else b1 = cache->uc_allocbucket; @@ -751,7 +753,7 @@ cache_drain_safe_cpu(uma_zone_t zone) } if (cache->uc_freebucket) { if (cache->uc_freebucket->ub_cnt != 0) - LIST_INSERT_HEAD(&zone->uz_buckets, + LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets, cache->uc_freebucket, ub_link); else b2 = cache->uc_freebucket; @@ -806,18 +808,22 @@ cache_drain_safe(uma_zone_t zone) static void bucket_cache_drain(uma_zone_t zone) { + uma_zone_domain_t zdom; uma_bucket_t bucket; + int i; /* - * Drain the bucket queues and free the buckets, we just keep two per - * cpu (alloc/free). + * Drain the bucket queues and free the buckets. */ - while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) { - LIST_REMOVE(bucket, ub_link); - ZONE_UNLOCK(zone); - bucket_drain(zone, bucket); - bucket_free(zone, bucket, NULL); - ZONE_LOCK(zone); + for (i = 0; i < vm_ndomains; i++) { + zdom = &zone->uz_domain[i]; + while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { + LIST_REMOVE(bucket, ub_link); + ZONE_UNLOCK(zone); + bucket_drain(zone, bucket); + bucket_free(zone, bucket, NULL); + ZONE_LOCK(zone); + } } /* @@ -862,7 +868,9 @@ static void keg_drain(uma_keg_t keg) { struct slabhead freeslabs = { 0 }; + uma_domain_t dom; uma_slab_t slab, tmp; + int i; /* * We don't want to take pages from statically allocated kegs at this @@ -877,20 +885,25 @@ keg_drain(uma_keg_t keg) if (keg->uk_free == 0) goto finished; - LIST_FOREACH_SAFE(slab, &keg->uk_free_slab, us_link, tmp) { - /* We have nowhere to free these to. */ - if (slab->us_flags & UMA_SLAB_BOOT) - continue; + for (i = 0; i < vm_ndomains; i++) { + dom = &keg->uk_domain[i]; + LIST_FOREACH_SAFE(slab, &dom->ud_free_slab, us_link, tmp) { + /* We have nowhere to free these to. */ + if (slab->us_flags & UMA_SLAB_BOOT) + continue; - LIST_REMOVE(slab, us_link); - keg->uk_pages -= keg->uk_ppera; - keg->uk_free -= keg->uk_ipers; + LIST_REMOVE(slab, us_link); + keg->uk_pages -= keg->uk_ppera; + keg->uk_free -= keg->uk_ipers; - if (keg->uk_flags & UMA_ZONE_HASH) - UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data); + if (keg->uk_flags & UMA_ZONE_HASH) + UMA_HASH_REMOVE(&keg->uk_hash, slab, + slab->us_data); - SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); + SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink); + } } + finished: KEG_UNLOCK(keg); @@ -950,7 +963,7 @@ zone_drain(uma_zone_t zone) * caller specified M_NOWAIT. */ static uma_slab_t -keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait) +keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait) { uma_alloc allocf; uma_slab_t slab; @@ -959,6 +972,8 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wai uint8_t flags; int i; + KASSERT(domain >= 0 && domain < vm_ndomains, + ("keg_alloc_slab: domain %d out of range", domain)); mtx_assert(&keg->uk_lock, MA_OWNED); slab = NULL; mem = NULL; @@ -968,7 +983,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wai size = keg->uk_ppera * PAGE_SIZE; if (keg->uk_flags & UMA_ZONE_OFFPAGE) { - slab = zone_alloc_item(keg->uk_slabzone, NULL, wait); + slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, wait); if (slab == NULL) goto out; } @@ -989,7 +1004,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wai wait |= M_NODUMP; /* zone is passed for legacy reasons. */ - mem = allocf(zone, size, &flags, wait); + mem = allocf(zone, size, domain, &flags, wait); if (mem == NULL) { if (keg->uk_flags & UMA_ZONE_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); @@ -1010,6 +1025,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wai slab->us_data = mem; slab->us_freecount = keg->uk_ipers; slab->us_flags = flags; + slab->us_domain = domain; BIT_FILL(SLAB_SETSIZE, &slab->us_free); #ifdef INVARIANTS BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree); @@ -1049,7 +1065,8 @@ out: * the VM is ready. */ static void * -startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) +startup_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, + int wait) { uma_keg_t keg; void *mem; @@ -1082,7 +1099,7 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_ #else keg->uk_allocf = page_alloc; #endif - return keg->uk_allocf(zone, bytes, pflag, wait); + return keg->uk_allocf(zone, bytes, domain, pflag, wait); } /* @@ -1097,12 +1114,13 @@ startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_ * NULL if M_NOWAIT is set. */ static void * -page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait) +page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, + int wait) { void *p; /* Returned page */ *pflag = UMA_SLAB_KERNEL; - p = (void *) kmem_malloc(kernel_arena, bytes, wait); + p = (void *) kmem_malloc_domain(domain, bytes, wait); return (p); } @@ -1119,7 +1137,8 @@ page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t * * NULL if M_NOWAIT is set. */ static void * -noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) +noobj_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, + int wait) { TAILQ_HEAD(, vm_page) alloctail; u_long npages; @@ -1132,7 +1151,7 @@ noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t npages = howmany(bytes, PAGE_SIZE); while (npages > 0) { - p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | + p = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ | ((wait & M_WAITOK) != 0 ? VM_ALLOC_WAITOK : VM_ALLOC_NOWAIT)); @@ -1404,6 +1423,7 @@ keg_ctor(void *mem, int size, void *udata, int flags) keg->uk_init = arg->uminit; keg->uk_fini = arg->fini; keg->uk_align = arg->align; + keg->uk_cursor = 0; keg->uk_free = 0; keg->uk_reserve = 0; keg->uk_pages = 0; @@ -1545,6 +1565,8 @@ zone_ctor(void *mem, int size, void *udata, int flags) zone->uz_count_min = 0; zone->uz_flags = 0; zone->uz_warning = NULL; + /* The domain structures follow the cpu structures. */ + zone->uz_domain = (struct uma_zone_domain *)&zone->uz_cpu[mp_ncpus]; timevalclear(&zone->uz_ratecheck); keg = arg->keg; @@ -1750,22 +1772,43 @@ void uma_startup(void *mem, int npages) { struct uma_zctor_args args; + uma_keg_t masterkeg; + uintptr_t m; + int zsize; + int ksize; rw_init(&uma_rwlock, "UMA lock"); + ksize = sizeof(struct uma_keg) + + (sizeof(struct uma_domain) * vm_ndomains); + zsize = sizeof(struct uma_zone) + + (sizeof(struct uma_cache) * mp_ncpus) + + (sizeof(struct uma_zone_domain) * vm_ndomains); + + /* Use bootpages memory for the zone of zones and zone of kegs. */ + m = (uintptr_t)mem; + zones = (uma_zone_t)m; + m += roundup(zsize, CACHE_LINE_SIZE); + kegs = (uma_zone_t)m; + m += roundup(zsize, CACHE_LINE_SIZE); + masterkeg = (uma_keg_t)m; + m += roundup(ksize, CACHE_LINE_SIZE); + m = roundup(m, PAGE_SIZE); + npages -= (m - (uintptr_t)mem) / PAGE_SIZE; + mem = (void *)m; + /* "manually" create the initial zone */ memset(&args, 0, sizeof(args)); args.name = "UMA Kegs"; - args.size = sizeof(struct uma_keg); + args.size = ksize; args.ctor = keg_ctor; args.dtor = keg_dtor; args.uminit = zero_init; args.fini = NULL; - args.keg = &masterkeg; + args.keg = masterkeg; args.align = 32 - 1; args.flags = UMA_ZFLAG_INTERNAL; - /* The initial zone has no Per cpu queues so it's smaller */ - zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK); + zone_ctor(kegs, zsize, &args, M_WAITOK); mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF); bootmem = mem; @@ -1773,7 +1816,8 @@ uma_startup(void *mem, int npages) args.name = "UMA Zones"; args.size = sizeof(struct uma_zone) + - (sizeof(struct uma_cache) * (mp_maxid + 1)); + (sizeof(struct uma_cache) * (mp_maxid + 1)) + + (sizeof(struct uma_zone_domain) * vm_ndomains); args.ctor = zone_ctor; args.dtor = zone_dtor; args.uminit = zero_init; @@ -1781,8 +1825,7 @@ uma_startup(void *mem, int npages) args.keg = NULL; args.align = 32 - 1; args.flags = UMA_ZFLAG_INTERNAL; - /* The initial zone has no Per cpu queues so it's smaller */ - zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK); + zone_ctor(zones, zsize, &args, M_WAITOK); /* Now make a zone for slab headers */ slabzone = uma_zcreate("UMA Slabs", @@ -1834,7 +1877,7 @@ uma_kcreate(uma_zone_t zone, size_t size, uma_init umi args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align; args.flags = flags; args.zone = zone; - return (zone_alloc_item(kegs, &args, M_WAITOK)); + return (zone_alloc_item(kegs, &args, 0, M_WAITOK)); } /* See uma.h */ @@ -1891,7 +1934,7 @@ uma_zcreate(const char *name, size_t size, uma_ctor ct sx_slock(&uma_drain_lock); locked = true; } - res = zone_alloc_item(zones, &args, M_WAITOK); + res = zone_alloc_item(zones, &args, 0, M_WAITOK); if (locked) sx_sunlock(&uma_drain_lock); return (res); @@ -1926,7 +1969,7 @@ uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor locked = true; } /* XXX Attaches only one keg of potentially many. */ - res = zone_alloc_item(zones, &args, M_WAITOK); + res = zone_alloc_item(zones, &args, 0, M_WAITOK); if (locked) sx_sunlock(&uma_drain_lock); return (res); @@ -1953,7 +1996,7 @@ uma_zcache_create(char *name, int size, uma_ctor ctor, args.align = 0; args.flags = flags; - return (zone_alloc_item(zones, &args, M_WAITOK)); + return (zone_alloc_item(zones, &args, 0, M_WAITOK)); } static void @@ -2058,11 +2101,11 @@ uma_zwait(uma_zone_t zone) void * uma_zalloc_arg(uma_zone_t zone, void *udata, int flags) { - void *item; - uma_cache_t cache; + uma_zone_domain_t zdom; uma_bucket_t bucket; - int lockfail; - int cpu; + uma_cache_t cache; + void *item; + int cpu, domain, lockfail; /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */ random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA); @@ -2159,6 +2202,11 @@ zalloc_start: if (bucket != NULL) bucket_free(zone, bucket, udata); + if (zone->uz_flags & UMA_ZONE_NUMA) + domain = PCPU_GET(domain); + else + domain = 0; + /* Short-circuit for zones without buckets and low memory. */ if (zone->uz_count == 0 || bucketdisable) goto zalloc_item; @@ -2199,7 +2247,8 @@ zalloc_start: /* * Check the zone's cache of buckets. */ - if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) { + zdom = &zone->uz_domain[domain]; + if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { KASSERT(bucket->ub_cnt != 0, ("uma_zalloc_arg: Returning an empty bucket.")); @@ -2224,7 +2273,7 @@ zalloc_start: * works we'll restart the allocation from the beginning and it * will use the just filled bucket. */ - bucket = zone_alloc_bucket(zone, udata, flags); + bucket = zone_alloc_bucket(zone, udata, domain, flags); CTR3(KTR_UMA, "uma_zalloc: zone %s(%p) bucket zone returned %p", zone->uz_name, zone, bucket); if (bucket != NULL) { @@ -2237,10 +2286,12 @@ zalloc_start: * initialized bucket to make this less likely or claim * the memory directly. */ - if (cache->uc_allocbucket == NULL) - cache->uc_allocbucket = bucket; + if (cache->uc_allocbucket != NULL || + (zone->uz_flags & UMA_ZONE_NUMA && + domain != PCPU_GET(domain))) + LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); else - LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link); + cache->uc_allocbucket = bucket; ZONE_UNLOCK(zone); goto zalloc_start; } @@ -2249,38 +2300,79 @@ zalloc_start: * We may not be able to get a bucket so return an actual item. */ zalloc_item: - item = zone_alloc_item(zone, udata, flags); + item = zone_alloc_item(zone, udata, domain, flags); return (item); } +/* + * Find a slab with some space. Prefer slabs that are partially used over those + * that are totally full. This helps to reduce fragmentation. + * + * If 'rr' is 1, search all domains starting from 'domain'. Otherwise check + * only 'domain'. + */ static uma_slab_t -keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags) +keg_first_slab(uma_keg_t keg, int domain, int rr) { + uma_domain_t dom; uma_slab_t slab; - int reserve; + int start; + KASSERT(domain >= 0 && domain < vm_ndomains, + ("keg_first_slab: domain %d out of range", domain)); + + slab = NULL; + start = domain; + do { + dom = &keg->uk_domain[domain]; + if (!LIST_EMPTY(&dom->ud_part_slab)) + return (LIST_FIRST(&dom->ud_part_slab)); + if (!LIST_EMPTY(&dom->ud_free_slab)) { + slab = LIST_FIRST(&dom->ud_free_slab); + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); + return (slab); + } + if (rr) + domain = (domain + 1) % vm_ndomains; + } while (domain != start); + + return (NULL); +} + +static uma_slab_t +keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags) +{ + uma_domain_t dom; + uma_slab_t slab; + int allocflags, domain, reserve, rr, start; + mtx_assert(&keg->uk_lock, MA_OWNED); slab = NULL; reserve = 0; + allocflags = flags; if ((flags & M_USE_RESERVE) == 0) reserve = keg->uk_reserve; - for (;;) { - /* - * Find a slab with some space. Prefer slabs that are partially - * used over those that are totally full. This helps to reduce - * fragmentation. - */ - if (keg->uk_free > reserve) { - if (!LIST_EMPTY(&keg->uk_part_slab)) { - slab = LIST_FIRST(&keg->uk_part_slab); - } else { - slab = LIST_FIRST(&keg->uk_free_slab); - LIST_REMOVE(slab, us_link); - LIST_INSERT_HEAD(&keg->uk_part_slab, slab, - us_link); - } + /* + * Round-robin for non first-touch zones when there is more than one + * domain. + */ *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** From owner-svn-src-user@freebsd.org Tue Nov 21 22:06:50 2017 Return-Path: Delivered-To: svn-src-user@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 7BBEBDB9474 for ; Tue, 21 Nov 2017 22:06:50 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 48A11130D; Tue, 21 Nov 2017 22:06:50 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vALM6nuE003485; Tue, 21 Nov 2017 22:06:49 GMT (envelope-from jeff@FreeBSD.org) Received: (from jeff@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vALM6nLD003484; Tue, 21 Nov 2017 22:06:49 GMT (envelope-from jeff@FreeBSD.org) Message-Id: <201711212206.vALM6nLD003484@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jeff set sender to jeff@FreeBSD.org using -f From: Jeff Roberson Date: Tue, 21 Nov 2017 22:06:49 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r326078 - user/jeff/numa/sys/vm X-SVN-Group: user X-SVN-Commit-Author: jeff X-SVN-Commit-Paths: user/jeff/numa/sys/vm X-SVN-Commit-Revision: 326078 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 21 Nov 2017 22:06:50 -0000 Author: jeff Date: Tue Nov 21 22:06:49 2017 New Revision: 326078 URL: https://svnweb.freebsd.org/changeset/base/326078 Log: Fix for r325754 Limit UMA reclaim calls to once per-second. Modified: user/jeff/numa/sys/vm/uma_core.c Modified: user/jeff/numa/sys/vm/uma_core.c ============================================================================== --- user/jeff/numa/sys/vm/uma_core.c Tue Nov 21 21:18:04 2017 (r326077) +++ user/jeff/numa/sys/vm/uma_core.c Tue Nov 21 22:06:49 2017 (r326078) @@ -3306,16 +3306,19 @@ void uma_reclaim_worker(void *arg __unused) { - sx_xlock(&uma_drain_lock); for (;;) { - sx_sleep(uma_reclaim, &uma_drain_lock, PVM, "umarcl", 0); - if (uma_reclaim_needed) { - sx_xunlock(&uma_drain_lock); - EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); - sx_xlock(&uma_drain_lock); - uma_reclaim_locked(true); - atomic_set_int(&uma_reclaim_needed, 0); - } + sx_xlock(&uma_drain_lock); + while (uma_reclaim_needed == 0) + sx_sleep(uma_reclaim, &uma_drain_lock, PVM, "umarcl", + hz); + sx_xunlock(&uma_drain_lock); + EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM); + sx_xlock(&uma_drain_lock); + uma_reclaim_locked(true); + atomic_set_int(&uma_reclaim_needed, 0); + sx_xunlock(&uma_drain_lock); + /* Don't fire more than once per-second. */ + pause("umarclslp", hz); } } From owner-svn-src-user@freebsd.org Wed Nov 22 00:37:16 2017 Return-Path: Delivered-To: svn-src-user@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 4218DDBD4DA for ; Wed, 22 Nov 2017 00:37:16 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 188B064B24; Wed, 22 Nov 2017 00:37:16 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vAM0bFTu066630; Wed, 22 Nov 2017 00:37:15 GMT (envelope-from jeff@FreeBSD.org) Received: (from jeff@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vAM0bEUj066626; Wed, 22 Nov 2017 00:37:14 GMT (envelope-from jeff@FreeBSD.org) Message-Id: <201711220037.vAM0bEUj066626@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jeff set sender to jeff@FreeBSD.org using -f From: Jeff Roberson Date: Wed, 22 Nov 2017 00:37:14 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r326081 - user/jeff/numa/sys/vm X-SVN-Group: user X-SVN-Commit-Author: jeff X-SVN-Commit-Paths: user/jeff/numa/sys/vm X-SVN-Commit-Revision: 326081 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 22 Nov 2017 00:37:16 -0000 Author: jeff Date: Wed Nov 22 00:37:14 2017 New Revision: 326081 URL: https://svnweb.freebsd.org/changeset/base/326081 Log: Update for r325992 Use the domain specific functions in kmem_alloc_*. Modified: user/jeff/numa/sys/vm/vm_kern.c user/jeff/numa/sys/vm/vm_page.c user/jeff/numa/sys/vm/vm_phys.c user/jeff/numa/sys/vm/vm_phys.h Modified: user/jeff/numa/sys/vm/vm_kern.c ============================================================================== --- user/jeff/numa/sys/vm/vm_kern.c Tue Nov 21 23:25:22 2017 (r326080) +++ user/jeff/numa/sys/vm/vm_kern.c Wed Nov 22 00:37:14 2017 (r326081) @@ -185,13 +185,13 @@ kmem_alloc_attr_domain(int domain, vm_size_t size, int for (i = 0; i < size; i += PAGE_SIZE) { tries = 0; retry: - m = vm_page_alloc_contig(object, atop(offset + i), - pflags, 1, low, high, PAGE_SIZE, 0, memattr); + m = vm_page_alloc_contig_domain(object, atop(offset + i), + domain, pflags, 1, low, high, PAGE_SIZE, 0, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - if (!vm_page_reclaim_contig(pflags, 1, - low, high, PAGE_SIZE, 0) && + if (!vm_page_reclaim_contig_domain(pflags, 1, + domain, low, high, PAGE_SIZE, 0) && (flags & M_WAITOK) != 0) VM_WAIT; VM_OBJECT_WLOCK(object); @@ -202,6 +202,9 @@ retry: vmem_free(vmem, addr, size); return (0); } + KASSERT(vm_phys_domidx(m) == domain, + ("kmem_alloc_attr_domain: Domain mismatch %d != %d", + vm_phys_domidx(m), domain)); if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); m->valid = VM_PAGE_BITS_ALL; @@ -274,13 +277,14 @@ kmem_alloc_contig_domain(int domain, vm_size_t size, i VM_OBJECT_WLOCK(object); tries = 0; retry: - m = vm_page_alloc_contig(object, atop(offset), pflags, + m = vm_page_alloc_contig_domain(object, atop(offset), domain, pflags, npages, low, high, alignment, boundary, memattr); if (m == NULL) { VM_OBJECT_WUNLOCK(object); if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - if (!vm_page_reclaim_contig(pflags, npages, low, high, - alignment, boundary) && (flags & M_WAITOK) != 0) + if (!vm_page_reclaim_contig_domain(pflags, npages, + domain, low, high, alignment, boundary) && + (flags & M_WAITOK) != 0) VM_WAIT; VM_OBJECT_WLOCK(object); tries++; @@ -289,6 +293,9 @@ retry: vmem_free(vmem, addr, size); return (0); } + KASSERT(vm_phys_domidx(m) == domain, + ("kmem_alloc_contig_domain: Domain mismatch %d != %d", + vm_phys_domidx(m), domain)); end_m = m + npages; tmp = addr; for (; m < end_m; m++) { Modified: user/jeff/numa/sys/vm/vm_page.c ============================================================================== --- user/jeff/numa/sys/vm/vm_page.c Tue Nov 21 23:25:22 2017 (r326080) +++ user/jeff/numa/sys/vm/vm_page.c Wed Nov 22 00:37:14 2017 (r326081) @@ -2540,8 +2540,8 @@ CTASSERT(powerof2(NRUNS)); * must be a power of two. */ bool -vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, - u_long alignment, vm_paddr_t boundary) +vm_page_reclaim_contig_domain(int req, u_long npages, int domain, + vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { vm_paddr_t curr_low; vm_page_t m_run, m_runs[NRUNS]; @@ -2581,8 +2581,8 @@ vm_page_reclaim_contig(int req, u_long npages, vm_padd curr_low = low; count = 0; for (;;) { - m_run = vm_phys_scan_contig(npages, curr_low, high, - alignment, boundary, options); + m_run = vm_phys_scan_contig(domain, npages, curr_low, + high, alignment, boundary, options); if (m_run == NULL) break; curr_low = VM_PAGE_TO_PHYS(m_run) + ptoa(npages); @@ -2622,6 +2622,28 @@ vm_page_reclaim_contig(int req, u_long npages, vm_padd return (reclaimed != 0); } } + +bool +vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low, vm_paddr_t high, + u_long alignment, vm_paddr_t boundary) +{ + struct vm_domain_iterator vi; + int domain; + bool ret; + + ret = false; + vm_policy_iterator_init(&vi); + while ((vm_domain_iterator_run(&vi, &domain)) == 0) { + ret = vm_page_reclaim_contig_domain(req, npages, domain, low, + high, alignment, boundary); + if (ret) + break; + } + vm_policy_iterator_finish(&vi); + + return (ret); +} + /* * vm_wait: (also see VM_WAIT macro) Modified: user/jeff/numa/sys/vm/vm_phys.c ============================================================================== --- user/jeff/numa/sys/vm/vm_phys.c Tue Nov 21 23:25:22 2017 (r326080) +++ user/jeff/numa/sys/vm/vm_phys.c Wed Nov 22 00:37:14 2017 (r326081) @@ -1010,7 +1010,7 @@ vm_phys_free_contig(vm_page_t m, u_long npages) * be a power of two. */ vm_page_t -vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, +vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, int options) { vm_paddr_t pa_end; @@ -1025,6 +1025,8 @@ vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_ return (NULL); for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; + if (seg->domain != domain) + continue; if (seg->start >= high) break; if (low >= seg->end) Modified: user/jeff/numa/sys/vm/vm_phys.h ============================================================================== --- user/jeff/numa/sys/vm/vm_phys.h Tue Nov 21 23:25:22 2017 (r326080) +++ user/jeff/numa/sys/vm/vm_phys.h Wed Nov 22 00:37:14 2017 (r326081) @@ -85,8 +85,8 @@ void vm_phys_free_pages(vm_page_t m, int order); void vm_phys_init(void); void vm_phys_init_page(vm_paddr_t pa); vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa); -vm_page_t vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, - u_long alignment, vm_paddr_t boundary, int options); +vm_page_t vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary, int options); void vm_phys_set_pool(int pool, vm_page_t m, int order); boolean_t vm_phys_unfree_page(vm_page_t m); int vm_phys_mem_affinity(int f, int t); From owner-svn-src-user@freebsd.org Thu Nov 23 12:54:19 2017 Return-Path: Delivered-To: svn-src-user@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id B9947DEC228 for ; Thu, 23 Nov 2017 12:54:19 +0000 (UTC) (envelope-from pho@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 911B86A271; Thu, 23 Nov 2017 12:54:19 +0000 (UTC) (envelope-from pho@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vANCsIu0098801; Thu, 23 Nov 2017 12:54:18 GMT (envelope-from pho@FreeBSD.org) Received: (from pho@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vANCsILU098800; Thu, 23 Nov 2017 12:54:18 GMT (envelope-from pho@FreeBSD.org) Message-Id: <201711231254.vANCsILU098800@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: pho set sender to pho@FreeBSD.org using -f From: Peter Holm Date: Thu, 23 Nov 2017 12:54:18 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r326126 - user/pho/stress2/misc X-SVN-Group: user X-SVN-Commit-Author: pho X-SVN-Commit-Paths: user/pho/stress2/misc X-SVN-Commit-Revision: 326126 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 23 Nov 2017 12:54:19 -0000 Author: pho Date: Thu Nov 23 12:54:18 2017 New Revision: 326126 URL: https://svnweb.freebsd.org/changeset/base/326126 Log: Add a regression test. PR: 223732 Sponsored by: Dell EMC Isilon Added: user/pho/stress2/misc/mmap32.sh (contents, props changed) Added: user/pho/stress2/misc/mmap32.sh ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/pho/stress2/misc/mmap32.sh Thu Nov 23 12:54:18 2017 (r326126) @@ -0,0 +1,202 @@ +#!/bin/sh + +# +# Copyright (c) 2017 Dell EMC Isilon +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +# Bug 223732 - mmap(2) causes unkillable denial of service with specific +# flags +# Test scenario inspired by: Arto Pekkanen + +# Fixed by r326098. + +. ../default.cfg + +dir=/tmp +odir=`pwd` +cd $dir +sed '1,/^EOF/d' < $odir/$0 > $dir/mmap32.c +mycc -o mmap32 -Wall -Wextra -O0 -g mmap32.c || exit 1 +rm -f mmap32.c + +$dir/mmap32 +s=$? +[ -f mmap32.core -a $s -eq 0 ] && + { ls -l mmap32.core; mv mmap32.core /tmp; s=1; } + +rm -rf $dir/mmap32 +exit $s + +EOF +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define N 4096 +static uint32_t r[N]; + +static unsigned long +makearg(void) +{ + unsigned int i; + unsigned long val; + + val = arc4random(); + i = arc4random() % 100; + if (i < 20) + val = val & 0xff; + if (i >= 20 && i < 40) + val = val & 0xffff; + if (i >= 40 && i < 60) + val = (unsigned long)(r) | (val & 0xffff); +#if defined(__LP64__) + if (i >= 60) { + val = (val << 32) | arc4random(); + if (i > 80) + val = val & 0x00007fffffffffffUL; + } +#endif + + return(val); +} + +static void +fuzz(int arg, void *addr, size_t len, int prot, int flags, int fd, + off_t offset) +{ + time_t start; + void *vp; + int n; + + setproctitle("arg%d", arg); + n = 0; + start = time(NULL); + while (time(NULL) - start < 10) { + switch (arg) { + case 1: + addr = (void *)makearg(); + break; + case 2: + len = makearg(); + break; + case 3: + prot = makearg(); + break; + case 4: + flags = makearg(); + break; + case 5: + fd = makearg(); + break; + case 6: + offset = makearg() & 0xffff; + break; + case 34: + prot = makearg(); + flags = makearg(); + break; + } + vp = mmap(addr, len, prot, flags, fd, offset); + if (vp != MAP_FAILED) { + munmap(vp, len); + n++; + } + } +#if defined(DEBUG) + if (n == 0 && arg != 5) + fprintf(stderr, "%s(%d) failed\n", __func__, arg); +#endif + exit(0); +} + +int +main(void) +{ + off_t offset; + pid_t pid; + size_t len; + struct rlimit rl; + time_t start; + void *addr, *vp; + int e, flags, fd, i, prot, status; + + e = 0; + + rl.rlim_max = rl.rlim_cur = 0; + if (setrlimit(RLIMIT_CORE, &rl) == -1) + warn("setrlimit"); + addr = 0; + len = PAGE_SIZE; + prot = PROT_READ | PROT_WRITE; + flags = MAP_ANON | MAP_SHARED; + fd = -1; + offset = 0; + vp = mmap(addr, len, prot, flags, fd, offset); + if (vp == MAP_FAILED) + err(1, "initail mmap"); + munmap(vp, len); + + start = time(NULL); + while (time(NULL) - start < 120) { + for (i = 0; i < N; i++) + r[i] = arc4random(); + for (i = 0; i < 6; i++) { + if ((pid = fork()) == 0) + fuzz(i + 1, addr, len, prot, flags, fd, + offset); + if (waitpid(pid, &status, 0) != pid) + err(1, "waitpid %d", pid); + if (status != 0) { + if (WIFSIGNALED(status)) + fprintf(stderr, + "pid %d exit signal %d\n", + pid, WTERMSIG(status)); + } + e += status == 0 ? 0 : 1; + } + if ((pid = fork()) == 0) + fuzz(34, addr, len, prot, flags, fd, offset); + if (waitpid(pid, &status, 0) != pid) + err(1, "waitpid %d", pid); + if (status != 0) { + if (WIFSIGNALED(status)) + fprintf(stderr, + "pid %d exit signal %d\n", + pid, WTERMSIG(status)); + } + e += status == 0 ? 0 : 1; + } + + return (e); +}