Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 18 Aug 2019 07:06:32 +0000 (UTC)
From:      Jeff Roberson <jeff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r351181 - head/sys/vm
Message-ID:  <201908180706.x7I76W8U044822@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jeff
Date: Sun Aug 18 07:06:31 2019
New Revision: 351181
URL: https://svnweb.freebsd.org/changeset/base/351181

Log:
  Encapsulate phys_avail manipulation in a set of simple routines.  Add a
  NUMA aware boot time memory allocator that will be used to allocate early
  domain correct structures.  Code partially submitted by gallatin.
  
  Reviewed by:	gallatin, kib
  Tested by:	pho
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D21251

Modified:
  head/sys/vm/vm_page.c
  head/sys/vm/vm_phys.c
  head/sys/vm/vm_phys.h

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Sun Aug 18 04:19:41 2019	(r351180)
+++ head/sys/vm/vm_page.c	Sun Aug 18 07:06:31 2019	(r351181)
@@ -538,7 +538,7 @@ vm_page_startup(vm_offset_t vaddr)
 	char *list, *listend;
 	vm_offset_t mapped;
 	vm_paddr_t end, high_avail, low_avail, new_end, page_range, size;
-	vm_paddr_t biggestsize, last_pa, pa;
+	vm_paddr_t last_pa, pa;
 	u_long pagecount;
 	int biggestone, i, segind;
 #ifdef WITNESS
@@ -548,22 +548,10 @@ vm_page_startup(vm_offset_t vaddr)
 	long ii;
 #endif
 
-	biggestsize = 0;
-	biggestone = 0;
 	vaddr = round_page(vaddr);
 
-	for (i = 0; phys_avail[i + 1]; i += 2) {
-		phys_avail[i] = round_page(phys_avail[i]);
-		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
-	}
-	for (i = 0; phys_avail[i + 1]; i += 2) {
-		size = phys_avail[i + 1] - phys_avail[i];
-		if (size > biggestsize) {
-			biggestone = i;
-			biggestsize = size;
-		}
-	}
-
+	vm_phys_early_startup();
+	biggestone = vm_phys_avail_largest();
 	end = phys_avail[biggestone+1];
 
 	/*
@@ -776,7 +764,8 @@ vm_page_startup(vm_offset_t vaddr)
 	 * physical pages.
 	 */
 	for (i = 0; phys_avail[i + 1] != 0; i += 2)
-		vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]);
+		if (vm_phys_avail_size(i) != 0)
+			vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]);
 
 	/*
 	 * Initialize the physical memory allocator.

Modified: head/sys/vm/vm_phys.c
==============================================================================
--- head/sys/vm/vm_phys.c	Sun Aug 18 04:19:41 2019	(r351180)
+++ head/sys/vm/vm_phys.c	Sun Aug 18 07:06:31 2019	(r351181)
@@ -1101,8 +1101,8 @@ vm_phys_free_pages(vm_page_t m, int order)
 	vm_page_t m_buddy;
 
 	KASSERT(m->order == VM_NFREEORDER,
-	    ("vm_phys_free_pages: page %p has unexpected order %d",
-	    m, m->order));
+	    ("vm_phys_free_pages: page %p(%p) has unexpected order %d",
+	    m, (void *)m->phys_addr, m->order));
 	KASSERT(m->pool < VM_NFREEPOOL,
 	    ("vm_phys_free_pages: page %p has unexpected pool %d",
 	    m, m->pool));
@@ -1499,6 +1499,222 @@ done:
 		vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0);
 	}
 	return (m_ret);
+}
+
+/*
+ * Return the index of the first unused slot which may be the terminating
+ * entry.
+ */
+static int
+vm_phys_avail_count(void)
+{
+	int i;
+
+	for (i = 0; phys_avail[i + 1]; i += 2)
+		continue;
+	if (i > PHYS_AVAIL_ENTRIES)
+		panic("Improperly terminated phys_avail %d entries", i);
+
+	return (i);
+}
+
+/*
+ * Assert that a phys_avail entry is valid.
+ */
+static void
+vm_phys_avail_check(int i)
+{
+	if (phys_avail[i] & PAGE_MASK)
+		panic("Unaligned phys_avail[%d]: %#jx", i,
+		    (intmax_t)phys_avail[i]);
+	if (phys_avail[i+1] & PAGE_MASK)
+		panic("Unaligned phys_avail[%d + 1]: %#jx", i,
+		    (intmax_t)phys_avail[i]);
+	if (phys_avail[i + 1] < phys_avail[i])
+		panic("phys_avail[%d] start %#jx < end %#jx", i,
+		    (intmax_t)phys_avail[i], (intmax_t)phys_avail[i+1]);
+}
+
+/*
+ * Return the index of an overlapping phys_avail entry or -1.
+ */
+static int
+vm_phys_avail_find(vm_paddr_t pa)
+{
+	int i;
+
+	for (i = 0; phys_avail[i + 1]; i += 2)
+		if (phys_avail[i] <= pa && phys_avail[i + 1] > pa)
+			return (i);
+	return (-1);
+}
+
+/*
+ * Return the index of the largest entry.
+ */
+int
+vm_phys_avail_largest(void)
+{
+	vm_paddr_t sz, largesz;
+	int largest;
+	int i;
+
+	largest = 0;
+	largesz = 0;
+	for (i = 0; phys_avail[i + 1]; i += 2) {
+		sz = vm_phys_avail_size(i);
+		if (sz > largesz) {
+			largesz = sz;
+			largest = i;
+		}
+	}
+
+	return (largest);
+}
+
+vm_paddr_t
+vm_phys_avail_size(int i)
+{
+
+	return (phys_avail[i + 1] - phys_avail[i]);
+}
+
+/*
+ * Split an entry at the address 'pa'.  Return zero on success or errno.
+ */
+static int
+vm_phys_avail_split(vm_paddr_t pa, int i)
+{
+	int cnt;
+
+	vm_phys_avail_check(i);
+	if (pa <= phys_avail[i] || pa >= phys_avail[i + 1])
+		panic("vm_phys_avail_split: invalid address");
+	cnt = vm_phys_avail_count();
+	if (cnt >= PHYS_AVAIL_ENTRIES)
+		return (ENOSPC);
+	memmove(&phys_avail[i + 2], &phys_avail[i],
+	    (cnt - i) * sizeof(phys_avail[0]));
+	phys_avail[i + 1] = pa;
+	phys_avail[i + 2] = pa;
+	vm_phys_avail_check(i);
+	vm_phys_avail_check(i+2);
+
+	return (0);
+}
+
+/*
+ * This routine allocates NUMA node specific memory before the page
+ * allocator is bootstrapped.
+ */
+vm_paddr_t
+vm_phys_early_alloc(int domain, size_t alloc_size)
+{
+	int i, mem_index, biggestone;
+	vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align;
+
+
+	/*
+	 * Search the mem_affinity array for the biggest address
+	 * range in the desired domain.  This is used to constrain
+	 * the phys_avail selection below.
+	 */
+	biggestsize = 0;
+	mem_index = 0;
+	mem_start = 0;
+	mem_end = -1;
+#ifdef NUMA
+	if (mem_affinity != NULL) {
+		for (i = 0; ; i++) {
+			size = mem_affinity[i].end - mem_affinity[i].start;
+			if (size == 0)
+				break;
+			if (mem_affinity[i].domain != domain)
+				continue;
+			if (size > biggestsize) {
+				mem_index = i;
+				biggestsize = size;
+			}
+		}
+		mem_start = mem_affinity[mem_index].start;
+		mem_end = mem_affinity[mem_index].end;
+	}
+#endif
+
+	/*
+	 * Now find biggest physical segment in within the desired
+	 * numa domain.
+	 */
+	biggestsize = 0;
+	biggestone = 0;
+	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
+		/* skip regions that are out of range */
+		if (phys_avail[i+1] - alloc_size < mem_start ||
+		    phys_avail[i+1] > mem_end)
+			continue;
+		size = vm_phys_avail_size(i);
+		if (size > biggestsize) {
+			biggestone = i;
+			biggestsize = size;
+		}
+	}
+	alloc_size = round_page(alloc_size);
+
+	/*
+	 * Grab single pages from the front to reduce fragmentation.
+	 */
+	if (alloc_size == PAGE_SIZE) {
+		pa = phys_avail[biggestone];
+		phys_avail[biggestone] += PAGE_SIZE;
+		vm_phys_avail_check(biggestone);
+		return (pa);
+	}
+
+	/*
+	 * Naturally align large allocations.
+	 */
+	align = phys_avail[biggestone + 1] & (alloc_size - 1);
+	if (alloc_size + align > biggestsize)
+		panic("cannot find a large enough size\n");
+	if (align != 0 &&
+	    vm_phys_avail_split(phys_avail[biggestone + 1] - align,
+	    biggestone) != 0)
+		/* Wasting memory. */
+		phys_avail[biggestone + 1] -= align;
+
+	phys_avail[biggestone + 1] -= alloc_size;
+	vm_phys_avail_check(biggestone);
+	pa = phys_avail[biggestone + 1];
+	return (pa);
+}
+
+void
+vm_phys_early_startup(void)
+{
+	int i;
+
+	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
+		phys_avail[i] = round_page(phys_avail[i]);
+		phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
+	}
+
+#ifdef NUMA
+	/* Force phys_avail to be split by domain. */
+	if (mem_affinity != NULL) {
+		int idx;
+
+		for (i = 0; mem_affinity[i].end != 0; i++) {
+			idx = vm_phys_avail_find(mem_affinity[i].start);
+			if (idx != -1 &&
+			    phys_avail[idx] != mem_affinity[i].start)
+				vm_phys_avail_split(mem_affinity[i].start, idx);
+			idx = vm_phys_avail_find(mem_affinity[i].end);
+			if (idx != -1 &&
+			    phys_avail[idx] != mem_affinity[i].end)
+				vm_phys_avail_split(mem_affinity[i].end, idx);
+		}
+	}
+#endif
 }
 
 #ifdef DDB

Modified: head/sys/vm/vm_phys.h
==============================================================================
--- head/sys/vm/vm_phys.h	Sun Aug 18 04:19:41 2019	(r351180)
+++ head/sys/vm/vm_phys.h	Sun Aug 18 07:06:31 2019	(r351181)
@@ -103,6 +103,11 @@ vm_page_t vm_phys_scan_contig(int domain, u_long npage
 void vm_phys_set_pool(int pool, vm_page_t m, int order);
 boolean_t vm_phys_unfree_page(vm_page_t m);
 int vm_phys_mem_affinity(int f, int t);
+vm_paddr_t vm_phys_early_alloc(int domain, size_t alloc_size);
+void vm_phys_early_startup(void);
+int vm_phys_avail_largest(void);
+vm_paddr_t vm_phys_avail_size(int i);
+
 
 /*
  *



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201908180706.x7I76W8U044822>