Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 28 Nov 2017 23:18:35 +0000 (UTC)
From:      Jeff Roberson <jeff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r326346 - head/sys/vm
Message-ID:  <201711282318.vASNIZYf036085@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jeff
Date: Tue Nov 28 23:18:35 2017
New Revision: 326346
URL: https://svnweb.freebsd.org/changeset/base/326346

Log:
  Move domain iterators into the page layer where domain selection should take
  place.  This makes the majority of the phys layer explicitly domain specific.
  
  Reviewed by:	markj, kib (some objections)
  Discussed with:	alc
  Tested by:	pho
  Sponsored by:	Netflix & Dell EMC Isilon
  Differential Revision:	https://reviews.freebsd.org/D13014

Modified:
  head/sys/vm/vm_domain.c
  head/sys/vm/vm_domain.h
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h
  head/sys/vm/vm_phys.c
  head/sys/vm/vm_phys.h
  head/sys/vm/vm_reserv.c
  head/sys/vm/vm_reserv.h

Modified: head/sys/vm/vm_domain.c
==============================================================================
--- head/sys/vm/vm_domain.c	Tue Nov 28 22:57:13 2017	(r326345)
+++ head/sys/vm/vm_domain.c	Tue Nov 28 23:18:35 2017	(r326346)
@@ -61,6 +61,118 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/vm_domain.h>
 
+/*
+ * Default to first-touch + round-robin.
+ */
+static struct mtx vm_default_policy_mtx;
+MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex",
+    MTX_DEF);
+#ifdef VM_NUMA_ALLOC
+static struct vm_domain_policy vm_default_policy =
+    VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
+#else
+/* Use round-robin so the domain policy code will only try once per allocation */
+static struct vm_domain_policy vm_default_policy =
+    VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0);
+#endif
+
+static int
+sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS)
+{
+	char policy_name[32];
+	int error;
+
+	mtx_lock(&vm_default_policy_mtx);
+
+	/* Map policy to output string */
+	switch (vm_default_policy.p.policy) {
+	case VM_POLICY_FIRST_TOUCH:
+		strcpy(policy_name, "first-touch");
+		break;
+	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
+		strcpy(policy_name, "first-touch-rr");
+		break;
+	case VM_POLICY_ROUND_ROBIN:
+	default:
+		strcpy(policy_name, "rr");
+		break;
+	}
+	mtx_unlock(&vm_default_policy_mtx);
+
+	error = sysctl_handle_string(oidp, &policy_name[0],
+	    sizeof(policy_name), req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+
+	mtx_lock(&vm_default_policy_mtx);
+	/* Set: match on the subset of policies that make sense as a default */
+	if (strcmp("first-touch-rr", policy_name) == 0) {
+		vm_domain_policy_set(&vm_default_policy,
+		    VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
+	} else if (strcmp("first-touch", policy_name) == 0) {
+		vm_domain_policy_set(&vm_default_policy,
+		    VM_POLICY_FIRST_TOUCH, 0);
+	} else if (strcmp("rr", policy_name) == 0) {
+		vm_domain_policy_set(&vm_default_policy,
+		    VM_POLICY_ROUND_ROBIN, 0);
+	} else {
+		error = EINVAL;
+		goto finish;
+	}
+
+	error = 0;
+finish:
+	mtx_unlock(&vm_default_policy_mtx);
+	return (error);
+}
+
+SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW,
+    0, 0, sysctl_vm_default_policy, "A",
+    "Default policy (rr, first-touch, first-touch-rr");
+
+/*
+ * Initialise a VM domain iterator.
+ *
+ * Check the thread policy, then the proc policy,
+ * then default to the system policy.
+ */
+void
+vm_policy_iterator_init(struct vm_domain_iterator *vi)
+{
+#ifdef VM_NUMA_ALLOC
+	struct vm_domain_policy lcl;
+#endif
+
+	vm_domain_iterator_init(vi);
+
+#ifdef VM_NUMA_ALLOC
+	/* Copy out the thread policy */
+	vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy);
+	if (lcl.p.policy != VM_POLICY_NONE) {
+		/* Thread policy is present; use it */
+		vm_domain_iterator_set_policy(vi, &lcl);
+		return;
+	}
+
+	vm_domain_policy_localcopy(&lcl,
+	    &curthread->td_proc->p_vm_dom_policy);
+	if (lcl.p.policy != VM_POLICY_NONE) {
+		/* Process policy is present; use it */
+		vm_domain_iterator_set_policy(vi, &lcl);
+		return;
+	}
+#endif
+	/* Use system default policy */
+	vm_domain_iterator_set_policy(vi, &vm_default_policy);
+}
+
+void
+vm_policy_iterator_finish(struct vm_domain_iterator *vi)
+{
+
+	vm_domain_iterator_cleanup(vi);
+}
+
 #ifdef VM_NUMA_ALLOC
 static __inline int
 vm_domain_rr_selectdomain(int skip_domain)

Modified: head/sys/vm/vm_domain.h
==============================================================================
--- head/sys/vm/vm_domain.h	Tue Nov 28 22:57:13 2017	(r326345)
+++ head/sys/vm/vm_domain.h	Tue Nov 28 23:18:35 2017	(r326346)
@@ -63,4 +63,7 @@ extern	int vm_domain_iterator_run(struct vm_domain_ite
 extern	int vm_domain_iterator_isdone(struct vm_domain_iterator *vi);
 extern	int vm_domain_iterator_cleanup(struct vm_domain_iterator *vi);
 
+extern	void vm_policy_iterator_init(struct vm_domain_iterator *vi);
+extern	void vm_policy_iterator_finish(struct vm_domain_iterator *vi);
+
 #endif	/* __VM_DOMAIN_H__ */

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Tue Nov 28 22:57:13 2017	(r326345)
+++ head/sys/vm/vm_page.c	Tue Nov 28 23:18:35 2017	(r326346)
@@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_param.h>
+#include <vm/vm_domain.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
@@ -1603,6 +1604,16 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, 
 	    vm_radix_lookup_le(&object->rtree, pindex) : NULL));
 }
 
+vm_page_t
+vm_page_alloc_domain(vm_object_t object, vm_pindex_t pindex, int domain,
+    int req)
+{
+
+	return (vm_page_alloc_domain_after(object, pindex, domain, req,
+	    object != NULL ? vm_radix_lookup_le(&object->rtree, pindex) :
+	    NULL));
+}
+
 /*
  * Allocate a page in the specified object with the given page index.  To
  * optimize insertion of the page into the object, the caller must also specifiy
@@ -1610,10 +1621,35 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, 
  * page index, or NULL if no such page exists.
  */
 vm_page_t
-vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex, int req,
-    vm_page_t mpred)
+vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
+    int req, vm_page_t mpred)
 {
+	struct vm_domain_iterator vi;
 	vm_page_t m;
+	int domain, wait;
+
+	m = NULL;
+	vm_policy_iterator_init(&vi);
+	wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK);
+	req &= ~wait;
+	while (vm_domain_iterator_run(&vi, &domain) == 0) {
+		if (vm_domain_iterator_isdone(&vi))
+			req |= wait;
+		m = vm_page_alloc_domain_after(object, pindex, domain, req,
+		    mpred);
+		if (m != NULL)
+			break;
+	}
+	vm_policy_iterator_finish(&vi);
+
+	return (m);
+}
+
+vm_page_t
+vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
+    int req, vm_page_t mpred)
+{
+	vm_page_t m;
 	int flags, req_class;
 	u_int free_count;
 
@@ -1643,6 +1679,7 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pi
 	 * for the request class.
 	 */
 again:
+	m = NULL;
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (vm_cnt.v_free_count > vm_cnt.v_free_reserved ||
 	    (req_class == VM_ALLOC_SYSTEM &&
@@ -1655,23 +1692,26 @@ again:
 #if VM_NRESERVLEVEL > 0
 		if (object == NULL || (object->flags & (OBJ_COLORED |
 		    OBJ_FICTITIOUS)) != OBJ_COLORED || (m =
-		    vm_reserv_alloc_page(object, pindex, mpred)) == NULL)
+		    vm_reserv_alloc_page(object, pindex, domain,
+		    mpred)) == NULL)
 #endif
 		{
 			/*
 			 * If not, allocate it from the free page queues.
 			 */
-			m = vm_phys_alloc_pages(object != NULL ?
+			m = vm_phys_alloc_pages(domain, object != NULL ?
 			    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 #if VM_NRESERVLEVEL > 0
-			if (m == NULL && vm_reserv_reclaim_inactive()) {
-				m = vm_phys_alloc_pages(object != NULL ?
+			if (m == NULL && vm_reserv_reclaim_inactive(domain)) {
+				m = vm_phys_alloc_pages(domain,
+				    object != NULL ?
 				    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT,
 				    0);
 			}
 #endif
 		}
-	} else {
+	}
+	if (m == NULL) {
 		/*
 		 * Not allocatable, give up.
 		 */
@@ -1799,6 +1839,32 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t p
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr)
 {
+	struct vm_domain_iterator vi;
+	vm_page_t m;
+	int domain, wait;
+
+	m = NULL;
+	vm_policy_iterator_init(&vi);
+	wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK);
+	req &= ~wait;
+	while (vm_domain_iterator_run(&vi, &domain) == 0) {
+		if (vm_domain_iterator_isdone(&vi))
+			req |= wait;
+		m = vm_page_alloc_contig_domain(object, pindex, domain, req,
+		    npages, low, high, alignment, boundary, memattr);
+		if (m != NULL)
+			break;
+	}
+	vm_policy_iterator_finish(&vi);
+
+	return (m);
+}
+
+vm_page_t
+vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
+    int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
+    vm_paddr_t boundary, vm_memattr_t memattr)
+{
 	vm_page_t m, m_ret, mpred;
 	u_int busy_lock, flags, oflags;
 	int req_class;
@@ -1838,6 +1904,7 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t p
 	 * below the lower bound for the allocation class?
 	 */
 again:
+	m_ret = NULL;
 	mtx_lock(&vm_page_queue_free_mtx);
 	if (vm_cnt.v_free_count >= npages + vm_cnt.v_free_reserved ||
 	    (req_class == VM_ALLOC_SYSTEM &&
@@ -1850,31 +1917,27 @@ again:
 #if VM_NRESERVLEVEL > 0
 retry:
 		if (object == NULL || (object->flags & OBJ_COLORED) == 0 ||
-		    (m_ret = vm_reserv_alloc_contig(object, pindex, npages,
-		    low, high, alignment, boundary, mpred)) == NULL)
+		    (m_ret = vm_reserv_alloc_contig(object, pindex, domain,
+		    npages, low, high, alignment, boundary, mpred)) == NULL)
 #endif
 			/*
 			 * If not, allocate them from the free page queues.
 			 */
-			m_ret = vm_phys_alloc_contig(npages, low, high,
+			m_ret = vm_phys_alloc_contig(domain, npages, low, high,
 			    alignment, boundary);
-	} else {
-		if (vm_page_alloc_fail(object, req))
-			goto again;
-		return (NULL);
-	}
-	if (m_ret != NULL)
-		vm_phys_freecnt_adj(m_ret, -npages);
-	else {
 #if VM_NRESERVLEVEL > 0
-		if (vm_reserv_reclaim_contig(npages, low, high, alignment,
-		    boundary))
+		if (m_ret == NULL && vm_reserv_reclaim_contig(
+		    domain, npages, low, high, alignment, boundary))
 			goto retry;
 #endif
 	}
-	mtx_unlock(&vm_page_queue_free_mtx);
-	if (m_ret == NULL)
+	if (m_ret == NULL) {
+		if (vm_page_alloc_fail(object, req))
+			goto again;
 		return (NULL);
+	}
+	vm_phys_freecnt_adj(m_ret, -npages);
+	mtx_unlock(&vm_page_queue_free_mtx);
 	for (m = m_ret; m < &m_ret[npages]; m++)
 		vm_page_alloc_check(m);
 
@@ -1988,7 +2051,30 @@ vm_page_alloc_check(vm_page_t m)
 vm_page_t
 vm_page_alloc_freelist(int flind, int req)
 {
+	struct vm_domain_iterator vi;
 	vm_page_t m;
+	int domain, wait;
+
+	m = NULL;
+	vm_policy_iterator_init(&vi);
+	wait = req & (VM_ALLOC_WAITFAIL | VM_ALLOC_WAITOK);
+	req &= ~wait;
+	while (vm_domain_iterator_run(&vi, &domain) == 0) {
+		if (vm_domain_iterator_isdone(&vi))
+			req |= wait;
+		m = vm_page_alloc_freelist_domain(domain, flind, req);
+		if (m != NULL)
+			break;
+	}
+	vm_policy_iterator_finish(&vi);
+
+	return (m);
+}
+
+vm_page_t
+vm_page_alloc_freelist_domain(int domain, int flind, int req)
+{
+	vm_page_t m;
 	u_int flags, free_count;
 	int req_class;
 
@@ -2009,15 +2095,12 @@ again:
 	    (req_class == VM_ALLOC_SYSTEM &&
 	    vm_cnt.v_free_count > vm_cnt.v_interrupt_free_min) ||
 	    (req_class == VM_ALLOC_INTERRUPT &&
-	    vm_cnt.v_free_count > 0)) {
-		m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, 0);
-	} else {
+	    vm_cnt.v_free_count > 0))
+		m = vm_phys_alloc_freelist_pages(domain, flind,
+		    VM_FREEPOOL_DIRECT, 0);
+	if (m == NULL) {
 		if (vm_page_alloc_fail(NULL, req))
 			goto again;
-		return (NULL);
-	}
-	if (m == NULL) {
-		mtx_unlock(&vm_page_queue_free_mtx);
 		return (NULL);
 	}
 	free_count = vm_phys_freecnt_adj(m, -1);

Modified: head/sys/vm/vm_page.h
==============================================================================
--- head/sys/vm/vm_page.h	Tue Nov 28 22:57:13 2017	(r326345)
+++ head/sys/vm/vm_page.h	Tue Nov 28 23:18:35 2017	(r326346)
@@ -476,16 +476,24 @@ void vm_page_free_zero(vm_page_t m);
 void vm_page_activate (vm_page_t);
 void vm_page_advise(vm_page_t m, int advice);
 vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int);
+vm_page_t vm_page_alloc_domain(vm_object_t, vm_pindex_t, int, int);
 vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t);
+vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int,
+    vm_page_t);
 vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr);
+vm_page_t vm_page_alloc_contig_domain(vm_object_t object,
+    vm_pindex_t pindex, int domain, int req, u_long npages, vm_paddr_t low,
+    vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
+    vm_memattr_t memattr);
 vm_page_t vm_page_alloc_freelist(int, int);
+vm_page_t vm_page_alloc_freelist_domain(int, int, int);
 void vm_page_change_lock(vm_page_t m, struct mtx **mtx);
 vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
 int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
     vm_page_t *ma, int count);
-void vm_page_deactivate (vm_page_t);
+void vm_page_deactivate(vm_page_t);
 void vm_page_deactivate_noreuse(vm_page_t);
 void vm_page_dequeue(vm_page_t m);
 void vm_page_dequeue_locked(vm_page_t m);
@@ -506,6 +514,8 @@ void vm_page_putfake(vm_page_t m);
 void vm_page_readahead_finish(vm_page_t m);
 bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
     vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
+bool vm_page_reclaim_contig_domain(int req, u_long npages, int domain,
+    vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
 void vm_page_reference(vm_page_t m);
 void vm_page_remove (vm_page_t);
 int vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);

Modified: head/sys/vm/vm_phys.c
==============================================================================
--- head/sys/vm/vm_phys.c	Tue Nov 28 22:57:13 2017	(r326345)
+++ head/sys/vm/vm_phys.c	Tue Nov 28 23:18:35 2017	(r326346)
@@ -151,23 +151,6 @@ SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRIN
 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
     &vm_ndomains, 0, "Number of physical memory domains available.");
 
-/*
- * Default to first-touch + round-robin.
- */
-static struct mtx vm_default_policy_mtx;
-MTX_SYSINIT(vm_default_policy, &vm_default_policy_mtx, "default policy mutex",
-    MTX_DEF);
-#ifdef VM_NUMA_ALLOC
-static struct vm_domain_policy vm_default_policy =
-    VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
-#else
-/* Use round-robin so the domain policy code will only try once per allocation */
-static struct vm_domain_policy vm_default_policy =
-    VM_DOMAIN_POLICY_STATIC_INITIALISER(VM_POLICY_ROUND_ROBIN, 0);
-#endif
-
-static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool,
-    int order);
 static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary);
@@ -176,60 +159,6 @@ static void vm_phys_create_seg(vm_paddr_t start, vm_pa
 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
     int order);
 
-static int
-sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS)
-{
-	char policy_name[32];
-	int error;
-
-	mtx_lock(&vm_default_policy_mtx);
-
-	/* Map policy to output string */
-	switch (vm_default_policy.p.policy) {
-	case VM_POLICY_FIRST_TOUCH:
-		strcpy(policy_name, "first-touch");
-		break;
-	case VM_POLICY_FIRST_TOUCH_ROUND_ROBIN:
-		strcpy(policy_name, "first-touch-rr");
-		break;
-	case VM_POLICY_ROUND_ROBIN:
-	default:
-		strcpy(policy_name, "rr");
-		break;
-	}
-	mtx_unlock(&vm_default_policy_mtx);
-
-	error = sysctl_handle_string(oidp, &policy_name[0],
-	    sizeof(policy_name), req);
-	if (error != 0 || req->newptr == NULL)
-		return (error);
-
-	mtx_lock(&vm_default_policy_mtx);
-	/* Set: match on the subset of policies that make sense as a default */
-	if (strcmp("first-touch-rr", policy_name) == 0) {
-		vm_domain_policy_set(&vm_default_policy,
-		    VM_POLICY_FIRST_TOUCH_ROUND_ROBIN, 0);
-	} else if (strcmp("first-touch", policy_name) == 0) {
-		vm_domain_policy_set(&vm_default_policy,
-		    VM_POLICY_FIRST_TOUCH, 0);
-	} else if (strcmp("rr", policy_name) == 0) {
-		vm_domain_policy_set(&vm_default_policy,
-		    VM_POLICY_ROUND_ROBIN, 0);
-	} else {
-		error = EINVAL;
-		goto finish;
-	}
-
-	error = 0;
-finish:
-	mtx_unlock(&vm_default_policy_mtx);
-	return (error);
-}
-
-SYSCTL_PROC(_vm, OID_AUTO, default_policy, CTLTYPE_STRING | CTLFLAG_RW,
-    0, 0, sysctl_vm_default_policy, "A",
-    "Default policy (rr, first-touch, first-touch-rr");
-
 /*
  * Red-black tree helpers for vm fictitious range management.
  */
@@ -271,71 +200,6 @@ vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *
 	    (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end);
 }
 
-#ifdef notyet
-static __inline int
-vm_rr_selectdomain(void)
-{
-#ifdef VM_NUMA_ALLOC
-	struct thread *td;
-
-	td = curthread;
-
-	td->td_dom_rr_idx++;
-	td->td_dom_rr_idx %= vm_ndomains;
-	return (td->td_dom_rr_idx);
-#else
-	return (0);
-#endif
-}
-#endif /* notyet */
-
-/*
- * Initialise a VM domain iterator.
- *
- * Check the thread policy, then the proc policy,
- * then default to the system policy.
- *
- * Later on the various layers will have this logic
- * plumbed into them and the phys code will be explicitly
- * handed a VM domain policy to use.
- */
-static void
-vm_policy_iterator_init(struct vm_domain_iterator *vi)
-{
-#ifdef VM_NUMA_ALLOC
-	struct vm_domain_policy lcl;
-#endif
-
-	vm_domain_iterator_init(vi);
-
-#ifdef VM_NUMA_ALLOC
-	/* Copy out the thread policy */
-	vm_domain_policy_localcopy(&lcl, &curthread->td_vm_dom_policy);
-	if (lcl.p.policy != VM_POLICY_NONE) {
-		/* Thread policy is present; use it */
-		vm_domain_iterator_set_policy(vi, &lcl);
-		return;
-	}
-
-	vm_domain_policy_localcopy(&lcl,
-	    &curthread->td_proc->p_vm_dom_policy);
-	if (lcl.p.policy != VM_POLICY_NONE) {
-		/* Process policy is present; use it */
-		vm_domain_iterator_set_policy(vi, &lcl);
-		return;
-	}
-#endif
-	/* Use system default policy */
-	vm_domain_iterator_set_policy(vi, &vm_default_policy);
-}
-
-static void
-vm_policy_iterator_finish(struct vm_domain_iterator *vi)
-{
-
-	vm_domain_iterator_cleanup(vi);
-}
-
 boolean_t
 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high)
 {
@@ -504,7 +368,7 @@ _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, 
 
 	KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX,
 	    ("vm_phys_create_seg: increase VM_PHYSSEG_MAX"));
-	KASSERT(domain < vm_ndomains,
+	KASSERT(domain >= 0 && domain < vm_ndomains,
 	    ("vm_phys_create_seg: invalid domain provided"));
 	seg = &vm_phys_segs[vm_phys_nsegs++];
 	while (seg > vm_phys_segs && (seg - 1)->start >= end) {
@@ -736,29 +600,16 @@ vm_phys_split_pages(vm_page_t m, int oind, struct vm_f
  * The free page queues must be locked.
  */
 vm_page_t
-vm_phys_alloc_pages(int pool, int order)
+vm_phys_alloc_pages(int domain, int pool, int order)
 {
 	vm_page_t m;
-	int domain, flind;
-	struct vm_domain_iterator vi;
+	int flind;
 
-	KASSERT(pool < VM_NFREEPOOL,
-	    ("vm_phys_alloc_pages: pool %d is out of range", pool));
-	KASSERT(order < VM_NFREEORDER,
-	    ("vm_phys_alloc_pages: order %d is out of range", order));
-
-	vm_policy_iterator_init(&vi);
-
-	while ((vm_domain_iterator_run(&vi, &domain)) == 0) {
-		for (flind = 0; flind < vm_nfreelists; flind++) {
-			m = vm_phys_alloc_domain_pages(domain, flind, pool,
-			    order);
-			if (m != NULL)
-				return (m);
-		}
+	for (flind = 0; flind < vm_nfreelists; flind++) {
+		m = vm_phys_alloc_freelist_pages(domain, flind, pool, order);
+		if (m != NULL)
+			return (m);
 	}
-
-	vm_policy_iterator_finish(&vi);
 	return (NULL);
 }
 
@@ -770,41 +621,23 @@ vm_phys_alloc_pages(int pool, int order)
  * The free page queues must be locked.
  */
 vm_page_t
-vm_phys_alloc_freelist_pages(int freelist, int pool, int order)
+vm_phys_alloc_freelist_pages(int domain, int flind, int pool, int order)
 {
+	struct vm_freelist *alt, *fl;
 	vm_page_t m;
-	struct vm_domain_iterator vi;
-	int domain;
+	int oind, pind;
 
-	KASSERT(freelist < VM_NFREELIST,
+	KASSERT(domain >= 0 && domain < vm_ndomains,
+	    ("vm_phys_alloc_freelist_pages: domain %d is out of range",
+	    domain));
+	KASSERT(flind < VM_NFREELIST,
 	    ("vm_phys_alloc_freelist_pages: freelist %d is out of range",
-	    freelist));
+	    flind));
 	KASSERT(pool < VM_NFREEPOOL,
 	    ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool));
 	KASSERT(order < VM_NFREEORDER,
 	    ("vm_phys_alloc_freelist_pages: order %d is out of range", order));
 
-	vm_policy_iterator_init(&vi);
-
-	while ((vm_domain_iterator_run(&vi, &domain)) == 0) {
-		m = vm_phys_alloc_domain_pages(domain,
-		    vm_freelist_to_flind[freelist], pool, order);
-		if (m != NULL)
-			return (m);
-	}
-
-	vm_policy_iterator_finish(&vi);
-	return (NULL);
-}
-
-static vm_page_t
-vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order)
-{	
-	struct vm_freelist *fl;
-	struct vm_freelist *alt;
-	int oind, pind;
-	vm_page_t m;
-
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	fl = &vm_phys_free_queues[domain][flind][pool][0];
 	for (oind = order; oind < VM_NFREEORDER; oind++) {
@@ -1261,14 +1094,13 @@ vm_phys_unfree_page(vm_page_t m)
  * "alignment" and "boundary" must be a power of two.
  */
 vm_page_t
-vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
+vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary)
 {
 	vm_paddr_t pa_end, pa_start;
 	vm_page_t m_run;
-	struct vm_domain_iterator vi;
 	struct vm_phys_seg *seg;
-	int domain, segind;
+	int segind;
 
 	KASSERT(npages > 0, ("npages is 0"));
 	KASSERT(powerof2(alignment), ("alignment is not a power of 2"));
@@ -1276,12 +1108,6 @@ vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	if (low >= high)
 		return (NULL);
-	vm_policy_iterator_init(&vi);
-restartdom:
-	if (vm_domain_iterator_run(&vi, &domain) != 0) {
-		vm_policy_iterator_finish(&vi);
-		return (NULL);
-	}
 	m_run = NULL;
 	for (segind = vm_phys_nsegs - 1; segind >= 0; segind--) {
 		seg = &vm_phys_segs[segind];
@@ -1304,9 +1130,6 @@ restartdom:
 		if (m_run != NULL)
 			break;
 	}
-	if (m_run == NULL && !vm_domain_iterator_isdone(&vi))
-		goto restartdom;
-	vm_policy_iterator_finish(&vi);
 	return (m_run);
 }
 

Modified: head/sys/vm/vm_phys.h
==============================================================================
--- head/sys/vm/vm_phys.h	Tue Nov 28 22:57:13 2017	(r326345)
+++ head/sys/vm/vm_phys.h	Tue Nov 28 23:18:35 2017	(r326346)
@@ -72,10 +72,11 @@ extern int vm_phys_nsegs;
  * The following functions are only to be used by the virtual memory system.
  */
 void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end);
-vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
-    u_long alignment, vm_paddr_t boundary);
-vm_page_t vm_phys_alloc_freelist_pages(int freelist, int pool, int order);
-vm_page_t vm_phys_alloc_pages(int pool, int order);
+vm_page_t vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low,
+    vm_paddr_t high, u_long alignment, vm_paddr_t boundary);
+vm_page_t vm_phys_alloc_freelist_pages(int domain, int freelist, int pool,
+    int order);
+vm_page_t vm_phys_alloc_pages(int domain, int pool, int order);
 boolean_t vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high);
 int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
     vm_memattr_t memattr);
@@ -92,12 +93,13 @@ boolean_t vm_phys_unfree_page(vm_page_t m);
 int vm_phys_mem_affinity(int f, int t);
 
 /*
- *	vm_phys_domain:
  *
- * 	Return the memory domain the page belongs to.
+ *	vm_phys_domidx:
+ *
+ *	Return the index of the domain the page belongs to.
  */
-static inline struct vm_domain *
-vm_phys_domain(vm_page_t m)
+static inline int
+vm_phys_domidx(vm_page_t m)
 {
 #ifdef VM_NUMA_ALLOC
 	int domn, segind;
@@ -107,10 +109,22 @@ vm_phys_domain(vm_page_t m)
 	KASSERT(segind < vm_phys_nsegs, ("segind %d m %p", segind, m));
 	domn = vm_phys_segs[segind].domain;
 	KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m));
-	return (&vm_dom[domn]);
+	return (domn);
 #else
-	return (&vm_dom[0]);
+	return (0);
 #endif
+}
+
+/*
+ *	vm_phys_domain:
+ *
+ * 	Return the memory domain the page belongs to.
+ */
+static inline struct vm_domain *
+vm_phys_domain(vm_page_t m)
+{
+
+	return (&vm_dom[vm_phys_domidx(m)]);
 }
 
 static inline u_int

Modified: head/sys/vm/vm_reserv.c
==============================================================================
--- head/sys/vm/vm_reserv.c	Tue Nov 28 22:57:13 2017	(r326345)
+++ head/sys/vm/vm_reserv.c	Tue Nov 28 23:18:35 2017	(r326346)
@@ -170,6 +170,7 @@ struct vm_reserv {
 	vm_object_t	object;			/* containing object */
 	vm_pindex_t	pindex;			/* offset within object */
 	vm_page_t	pages;			/* first page of a superpage */
+	int		domain;			/* NUMA domain */
 	int		popcnt;			/* # of pages in use */
 	char		inpartpopq;
 	popmap_t	popmap[NPOPMAP];	/* bit vector of used pages */
@@ -207,8 +208,7 @@ static vm_reserv_t vm_reserv_array;
  *
  * Access to this queue is synchronized by the free page queue lock.
  */
-static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop =
-			    TAILQ_HEAD_INITIALIZER(vm_rvq_partpop);
+static TAILQ_HEAD(, vm_reserv) vm_rvq_partpop[MAXMEMDOM];
 
 static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info");
 
@@ -277,24 +277,27 @@ sysctl_vm_reserv_partpopq(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sbuf;
 	vm_reserv_t rv;
-	int counter, error, level, unused_pages;
+	int counter, error, domain, level, unused_pages;
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
-	sbuf_printf(&sbuf, "\nLEVEL     SIZE  NUMBER\n\n");
-	for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
-		counter = 0;
-		unused_pages = 0;
-		mtx_lock(&vm_page_queue_free_mtx);
-		TAILQ_FOREACH(rv, &vm_rvq_partpop/*[level]*/, partpopq) {
-			counter++;
-			unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
+	sbuf_printf(&sbuf, "\nDOMAIN    LEVEL     SIZE  NUMBER\n\n");
+	for (domain = 0; domain < vm_ndomains; domain++) {
+		for (level = -1; level <= VM_NRESERVLEVEL - 2; level++) {
+			counter = 0;
+			unused_pages = 0;
+			mtx_lock(&vm_page_queue_free_mtx);
+			TAILQ_FOREACH(rv, &vm_rvq_partpop[domain], partpopq) {
+				counter++;
+				unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt;
+			}
+			mtx_unlock(&vm_page_queue_free_mtx);
+			sbuf_printf(&sbuf, "%6d, %7d, %6dK, %6d\n",
+			    domain, level,
+			    unused_pages * ((int)PAGE_SIZE / 1024), counter);
 		}
-		mtx_unlock(&vm_page_queue_free_mtx);
-		sbuf_printf(&sbuf, "%5d: %6dK, %6d\n", level,
-		    unused_pages * ((int)PAGE_SIZE / 1024), counter);
 	}
 	error = sbuf_finish(&sbuf);
 	sbuf_delete(&sbuf);
@@ -321,8 +324,11 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
 	    index));
 	KASSERT(rv->popcnt > 0,
 	    ("vm_reserv_depopulate: reserv %p's popcnt is corrupted", rv));
+	KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
+	    ("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
+	    rv, rv->domain));
 	if (rv->inpartpopq) {
-		TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
+		TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
 		rv->inpartpopq = FALSE;
 	} else {
 		KASSERT(rv->pages->psind == 1,
@@ -335,11 +341,12 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
 	if (rv->popcnt == 0) {
 		LIST_REMOVE(rv, objq);
 		rv->object = NULL;
+		rv->domain = -1;
 		vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
 		vm_reserv_freed++;
 	} else {
 		rv->inpartpopq = TRUE;
-		TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
+		TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
 	}
 }
 
@@ -384,15 +391,18 @@ vm_reserv_populate(vm_reserv_t rv, int index)
 	    ("vm_reserv_populate: reserv %p is already full", rv));
 	KASSERT(rv->pages->psind == 0,
 	    ("vm_reserv_populate: reserv %p is already promoted", rv));
+	KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
+	    ("vm_reserv_populate: reserv %p's domain is corrupted %d",
+	    rv, rv->domain));
 	if (rv->inpartpopq) {
-		TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
+		TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
 		rv->inpartpopq = FALSE;
 	}
 	popmap_set(rv->popmap, index);
 	rv->popcnt++;
 	if (rv->popcnt < VM_LEVEL_0_NPAGES) {
 		rv->inpartpopq = TRUE;
-		TAILQ_INSERT_TAIL(&vm_rvq_partpop, rv, partpopq);
+		TAILQ_INSERT_TAIL(&vm_rvq_partpop[rv->domain], rv, partpopq);
 	} else
 		rv->pages->psind = 1;
 }
@@ -413,9 +423,9 @@ vm_reserv_populate(vm_reserv_t rv, int index)
  * The object and free page queue must be locked.
  */
 vm_page_t
-vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
-    vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
-    vm_page_t mpred)
+vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, int domain,
+    u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
+    vm_paddr_t boundary, vm_page_t mpred)
 {
 	vm_paddr_t pa, size;
 	vm_page_t m, m_ret, msucc;
@@ -535,7 +545,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
 	 * specified index may not be the first page within the first new
 	 * reservation.
 	 */
-	m = vm_phys_alloc_contig(allocpages, low, high, ulmax(alignment,
+	m = vm_phys_alloc_contig(domain, allocpages, low, high, ulmax(alignment,
 	    VM_LEVEL_0_SIZE), boundary > VM_LEVEL_0_SIZE ? boundary : 0);
 	if (m == NULL)
 		return (NULL);
@@ -558,6 +568,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t
 		LIST_INSERT_HEAD(&object->rvq, rv, objq);
 		rv->object = object;
 		rv->pindex = first;
+		rv->domain = vm_phys_domidx(m);
 		KASSERT(rv->popcnt == 0,
 		    ("vm_reserv_alloc_contig: reserv %p's popcnt is corrupted",
 		    rv));
@@ -613,7 +624,8 @@ found:
  * The object and free page queue must be locked.
  */
 vm_page_t
-vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, vm_page_t mpred)
+vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain,
+    vm_page_t mpred)
 {
 	vm_page_t m, msucc;
 	vm_pindex_t first, leftcap, rightcap;
@@ -692,7 +704,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p
 	/*
 	 * Allocate and populate the new reservation.
 	 */
-	m = vm_phys_alloc_pages(VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
+	m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, VM_LEVEL_0_ORDER);
 	if (m == NULL)
 		return (NULL);
 	rv = vm_reserv_from_page(m);
@@ -703,6 +715,7 @@ vm_reserv_alloc_page(vm_object_t object, vm_pindex_t p
 	LIST_INSERT_HEAD(&object->rvq, rv, objq);
 	rv->object = object;
 	rv->pindex = first;
+	rv->domain = vm_phys_domidx(m);
 	KASSERT(rv->popcnt == 0,
 	    ("vm_reserv_alloc_page: reserv %p's popcnt is corrupted", rv));
 	KASSERT(!rv->inpartpopq,
@@ -749,6 +762,7 @@ vm_reserv_break(vm_reserv_t rv, vm_page_t m)
 	    ("vm_reserv_break: reserv %p's inpartpopq is TRUE", rv));
 	LIST_REMOVE(rv, objq);
 	rv->object = NULL;
+	rv->domain = -1;
 	if (m != NULL) {
 		/*
 		 * Since the reservation is being broken, there is no harm in
@@ -818,7 +832,7 @@ vm_reserv_break_all(vm_object_t object)
 		KASSERT(rv->object == object,
 		    ("vm_reserv_break_all: reserv %p is corrupted", rv));
 		if (rv->inpartpopq) {
-			TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
+			TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
 			rv->inpartpopq = FALSE;
 		}
 		vm_reserv_break(rv, NULL);
@@ -856,7 +870,7 @@ vm_reserv_init(void)
 {
 	vm_paddr_t paddr;
 	struct vm_phys_seg *seg;
-	int segind;
+	int i, segind;
 
 	/*
 	 * Initialize the reservation array.  Specifically, initialize the
@@ -871,6 +885,8 @@ vm_reserv_init(void)
 			paddr += VM_LEVEL_0_SIZE;
 		}
 	}
+	for (i = 0; i < MAXMEMDOM; i++)
+		TAILQ_INIT(&vm_rvq_partpop[i]);
 }
 
 /*
@@ -928,7 +944,10 @@ vm_reserv_reclaim(vm_reserv_t rv)
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	KASSERT(rv->inpartpopq,
 	    ("vm_reserv_reclaim: reserv %p's inpartpopq is FALSE", rv));
-	TAILQ_REMOVE(&vm_rvq_partpop, rv, partpopq);
+	KASSERT(rv->domain >= 0 && rv->domain < vm_ndomains,
+	    ("vm_reserv_reclaim: reserv %p's domain is corrupted %d",
+	    rv, rv->domain));
+	TAILQ_REMOVE(&vm_rvq_partpop[rv->domain], rv, partpopq);
 	rv->inpartpopq = FALSE;
 	vm_reserv_break(rv, NULL);
 	vm_reserv_reclaimed++;
@@ -942,12 +961,12 @@ vm_reserv_reclaim(vm_reserv_t rv)
  * The free page queue lock must be held.
  */
 boolean_t
-vm_reserv_reclaim_inactive(void)
+vm_reserv_reclaim_inactive(int domain)
 {
 	vm_reserv_t rv;
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
-	if ((rv = TAILQ_FIRST(&vm_rvq_partpop)) != NULL) {
+	if ((rv = TAILQ_FIRST(&vm_rvq_partpop[domain])) != NULL) {
 		vm_reserv_reclaim(rv);
 		return (TRUE);
 	}
@@ -963,8 +982,8 @@ vm_reserv_reclaim_inactive(void)
  * The free page queue lock must be held.
  */
 boolean_t
-vm_reserv_reclaim_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
-    u_long alignment, vm_paddr_t boundary)
+vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low,
+    vm_paddr_t high, u_long alignment, vm_paddr_t boundary)
 {
 	vm_paddr_t pa, size;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201711282318.vASNIZYf036085>