From owner-svn-src-user@freebsd.org Wed Mar 14 00:30:50 2018 Return-Path: Delivered-To: svn-src-user@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 4B7EDF3064A for ; Wed, 14 Mar 2018 00:30:50 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id F2E317EBDD; Wed, 14 Mar 2018 00:30:49 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id EDAAA20E3D; Wed, 14 Mar 2018 00:30:49 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w2E0Un1q050283; Wed, 14 Mar 2018 00:30:49 GMT (envelope-from jeff@FreeBSD.org) Received: (from jeff@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w2E0Un73050279; Wed, 14 Mar 2018 00:30:49 GMT (envelope-from jeff@FreeBSD.org) Message-Id: <201803140030.w2E0Un73050279@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jeff set sender to jeff@FreeBSD.org using -f From: Jeff Roberson Date: Wed, 14 Mar 2018 00:30:49 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r330889 - user/jeff/numa/sys/vm X-SVN-Group: user X-SVN-Commit-Author: jeff X-SVN-Commit-Paths: user/jeff/numa/sys/vm X-SVN-Commit-Revision: 330889 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 14 Mar 2018 00:30:50 -0000 Author: jeff Date: Wed Mar 14 00:30:49 2018 New Revision: 330889 URL: https://svnweb.freebsd.org/changeset/base/330889 Log: Add a lock to the reservation structure to fix a LOR between the page and pmap locks. This lock adds 1MB of memory per 64GB of ram on amd64. Modified: user/jeff/numa/sys/vm/vm_kern.c user/jeff/numa/sys/vm/vm_page.c user/jeff/numa/sys/vm/vm_reserv.c user/jeff/numa/sys/vm/vm_reserv.h Modified: user/jeff/numa/sys/vm/vm_kern.c ============================================================================== --- user/jeff/numa/sys/vm/vm_kern.c Wed Mar 14 00:27:25 2018 (r330888) +++ user/jeff/numa/sys/vm/vm_kern.c Wed Mar 14 00:30:49 2018 (r330889) @@ -519,9 +519,7 @@ _kmem_unback(vm_object_t object, vm_offset_t addr, vm_ for (; offset < end; offset += PAGE_SIZE, m = next) { next = vm_page_next(m); vm_page_unwire(m, PQ_NONE); - vm_page_lock(m); vm_page_free(m); - vm_page_unlock(m); } VM_OBJECT_WUNLOCK(object); Modified: user/jeff/numa/sys/vm/vm_page.c ============================================================================== --- user/jeff/numa/sys/vm/vm_page.c Wed Mar 14 00:27:25 2018 (r330888) +++ user/jeff/numa/sys/vm/vm_page.c Wed Mar 14 00:30:49 2018 (r330889) @@ -800,12 +800,7 @@ vm_page_startup(vm_offset_t vaddr) vm_page_blacklist_check(list, NULL); freeenv(list); -#if VM_NRESERVLEVEL > 0 - /* - * Initialize the reservation management system. - */ - vm_reserv_init(); -#endif + /* * Set an initial domain policy for thread0 so that allocations * can work. Modified: user/jeff/numa/sys/vm/vm_reserv.c ============================================================================== --- user/jeff/numa/sys/vm/vm_reserv.c Wed Mar 14 00:27:25 2018 (r330888) +++ user/jeff/numa/sys/vm/vm_reserv.c Wed Mar 14 00:30:49 2018 (r330889) @@ -175,22 +175,24 @@ popmap_is_set(popmap_t popmap[], int i) * c - constant after boot */ struct vm_reserv { + struct mtx lock; /* reservation lock. */ TAILQ_ENTRY(vm_reserv) partpopq; /* (d) per-domain queue. */ LIST_ENTRY(vm_reserv) objq; /* (o, r) object queue */ vm_object_t object; /* (o, r) containing object */ vm_pindex_t pindex; /* (o, r) offset in object */ vm_page_t pages; /* (c) first page */ - int domain; /* (c) NUMA domain. */ - int popcnt; /* (r) # of pages in use */ + domainid_t domain; /* (c) NUMA domain. */ + uint16_t popcnt; /* (r) # of pages in use */ char inpartpopq; /* (d) */ popmap_t popmap[NPOPMAP]; /* (r) bit vector, used pages */ }; -#define vm_reserv_assert_locked(rv) vm_page_assert_locked((rv)->pages) -#define vm_reserv_lockptr(rv) vm_page_lockptr((rv)->pages) -#define vm_reserv_lock(rv) vm_page_lock((rv)->pages) -#define vm_reserv_trylock(rv) vm_page_trylock((rv)->pages) -#define vm_reserv_unlock(rv) vm_page_unlock((rv)->pages) +#define vm_reserv_lockptr(rv) (&(rv)->lock) +#define vm_reserv_assert_locked(rv) \ + mtx_assert(vm_reserv_lockptr(rv), MA_OWNED) +#define vm_reserv_lock(rv) mtx_lock(vm_reserv_lockptr(rv)) +#define vm_reserv_trylock(rv) mtx_trylock(vm_reserv_lockptr(rv)) +#define vm_reserv_unlock(rv) mtx_unlock(vm_reserv_lockptr(rv)) static struct mtx_padalign vm_reserv_domain_locks[MAXMEMDOM]; @@ -256,6 +258,8 @@ static counter_u64_t vm_reserv_reclaimed = EARLY_COUNT SYSCTL_COUNTER_U64(_vm_reserv, OID_AUTO, reclaimed, CTLFLAG_RD, &vm_reserv_reclaimed, "Cumulative number of reclaimed reservations"); +static __read_mostly int vm_reserv_enabled; + /* * The object lock pool is used to synchronize the rvq. We can not use a * pool mutex because it is required before malloc works. @@ -574,6 +578,8 @@ vm_reserv_extend_contig(int req, vm_object_t object, v VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0")); + if (vm_reserv_enabled == 0) + return (NULL); /* * Is a reservation fundamentally impossible? @@ -671,6 +677,8 @@ vm_reserv_alloc_contig(int req, vm_object_t object, vm VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(npages != 0, ("vm_reserv_alloc_contig: npages is 0")); + if (vm_reserv_enabled == 0) + return (NULL); /* * Is a reservation fundamentally impossible? @@ -843,6 +851,8 @@ vm_reserv_extend(int req, vm_object_t object, vm_pinde int index; VM_OBJECT_ASSERT_WLOCKED(object); + if (vm_reserv_enabled == 0) + return (NULL); /* * Could a reservation currently exist? @@ -903,6 +913,8 @@ vm_reserv_alloc_page(int req, vm_object_t object, vm_p int index; VM_OBJECT_ASSERT_WLOCKED(object); + if (vm_reserv_enabled == 0) + return (NULL); /* * Is a reservation fundamentally impossible? @@ -1102,47 +1114,21 @@ boolean_t vm_reserv_free_page(vm_page_t m) { vm_reserv_t rv; + boolean_t ret; rv = vm_reserv_from_page(m); if (rv->object == NULL) return (FALSE); - vm_reserv_depopulate(rv, m - rv->pages); - return (TRUE); -} + vm_reserv_lock(rv); + /* Re-validate after lock. */ + if (rv->object != NULL) { + vm_reserv_depopulate(rv, m - rv->pages); + ret = TRUE; + } else + ret = FALSE; + vm_reserv_unlock(rv); -/* - * Initializes the reservation management system. Specifically, initializes - * the reservation array. - * - * Requires that vm_page_array and first_page are initialized! - */ -void -vm_reserv_init(void) -{ - vm_paddr_t paddr; - struct vm_phys_seg *seg; - int i, segind; - - /* - * Initialize the reservation array. Specifically, initialize the - * "pages" field for every element that has an underlying superpage. - */ - for (segind = 0; segind < vm_phys_nsegs; segind++) { - seg = &vm_phys_segs[segind]; - paddr = roundup2(seg->start, VM_LEVEL_0_SIZE); - while (paddr + VM_LEVEL_0_SIZE <= seg->end) { - vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].pages = - PHYS_TO_VM_PAGE(paddr); - vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT].domain = - seg->domain; - paddr += VM_LEVEL_0_SIZE; - } - } - for (i = 0; i < MAXMEMDOM; i++) { - mtx_init(&vm_reserv_domain_locks[i], "VM reserv domain", NULL, - MTX_DEF); - TAILQ_INIT(&vm_rvq_partpop[i]); - } + return (ret); } /* @@ -1157,7 +1143,6 @@ vm_reserv_is_page_free(vm_page_t m) rv = vm_reserv_from_page(m); if (rv->object == NULL) return (false); - vm_reserv_assert_locked(rv); return (popmap_is_clear(rv->popmap, m - rv->pages)); } @@ -1225,6 +1210,8 @@ vm_reserv_reclaim_inactive(int domain) { vm_reserv_t rv; + if (vm_reserv_enabled == 0) + return (false); while ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) { vm_reserv_lock(rv); if (rv != TAILQ_FIRST(&vm_rvq_partpop[domain])) { @@ -1254,6 +1241,8 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm vm_reserv_t rv, rvn; int hi, i, lo, low_index, next_free; + if (vm_reserv_enabled == 0) + return (FALSE); if (npages > VM_LEVEL_0_NPAGES - 1) return (FALSE); size = npages << PAGE_SHIFT; @@ -1344,7 +1333,7 @@ again: } while (i < NPOPMAP); vm_reserv_unlock(rv); vm_reserv_domain_lock(domain); - if (!rvn->inpartpopq) + if (rvn != NULL && !rvn->inpartpopq) goto again; } vm_reserv_domain_unlock(domain); @@ -1411,7 +1400,6 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, { vm_paddr_t new_end; size_t size; - int i; /* * Calculate the size (in bytes) of the reservation array. Round up @@ -1431,26 +1419,58 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, VM_PROT_READ | VM_PROT_WRITE); bzero(vm_reserv_array, size); - for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++) - mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL, - MTX_DEF); - /* * Return the next available physical address. */ return (new_end); } +/* + * Initializes the reservation management system. Specifically, initializes + * the reservation array. + * + * Requires that vm_page_array and first_page are initialized! + */ static void -vm_reserv_counter_startup(void) +vm_reserv_init(void *unused) { + vm_paddr_t paddr; + struct vm_reserv *rv; + struct vm_phys_seg *seg; + int i, segind; + /* + * Initialize the reservation array. Specifically, initialize the + * "pages" field for every element that has an underlying superpage. + */ + for (segind = 0; segind < vm_phys_nsegs; segind++) { + seg = &vm_phys_segs[segind]; + paddr = roundup2(seg->start, VM_LEVEL_0_SIZE); + while (paddr + VM_LEVEL_0_SIZE <= seg->end) { + rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT]; + rv->pages = PHYS_TO_VM_PAGE(paddr); + rv->domain = seg->domain; + mtx_init(&rv->lock, "vm reserv", NULL, MTX_DEF); + paddr += VM_LEVEL_0_SIZE; + } + } + for (i = 0; i < MAXMEMDOM; i++) { + mtx_init(&vm_reserv_domain_locks[i], "VM reserv domain", NULL, + MTX_DEF); + TAILQ_INIT(&vm_rvq_partpop[i]); + } + + for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++) + mtx_init(&vm_reserv_object_mtx[i], "resv obj lock", NULL, + MTX_DEF); + vm_reserv_freed = counter_u64_alloc(M_WAITOK); vm_reserv_broken = counter_u64_alloc(M_WAITOK); vm_reserv_reclaimed = counter_u64_alloc(M_WAITOK); + + vm_reserv_enabled = 1; } -SYSINIT(vm_reserv_counters, SI_SUB_CPU, SI_ORDER_ANY, - vm_reserv_counter_startup, NULL); +SYSINIT(vm_reserv_init, SI_SUB_CPU, SI_ORDER_ANY, vm_reserv_init, NULL); /* * Returns the superpage containing the given page. Modified: user/jeff/numa/sys/vm/vm_reserv.h ============================================================================== --- user/jeff/numa/sys/vm/vm_reserv.h Wed Mar 14 00:27:25 2018 (r330888) +++ user/jeff/numa/sys/vm/vm_reserv.h Wed Mar 14 00:30:49 2018 (r330889) @@ -60,7 +60,6 @@ vm_page_t vm_reserv_extend(int req, vm_object_t object vm_pindex_t pindex, int domain, vm_page_t mpred); void vm_reserv_break_all(vm_object_t object); boolean_t vm_reserv_free_page(vm_page_t m); -void vm_reserv_init(void); bool vm_reserv_is_page_free(vm_page_t m); int vm_reserv_level(vm_page_t m); int vm_reserv_level_iffullpop(vm_page_t m);