Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 1 May 2010 04:46:50 +0530
From:      "C. Jayachandran" <c.jayachandran@gmail.com>
To:        freebsd-mips@freebsd.org, Randall Stewart <rrs@lakerest.net>
Subject:   Re: Alternate scheme for page table page allocation
Message-ID:  <m2m98a59be81004301616nd3f1b6b0yc0db67ad93a870f7@mail.gmail.com>
In-Reply-To: <t2m98a59be81004300404x408e4b7fx8eea156061153b30@mail.gmail.com>

index | next in thread | previous in thread | raw e-mail

[-- Attachment #1 --]
Here's a version which will apply on HEAD (also at
http://sites.google.com/site/cjayachandran/files).

Regards,
JC.

On Fri, Apr 30, 2010 at 4:34 PM, C. Jayachandran
<c.jayachandran@gmail.com> wrote:
> I was looking at a hang I get during 'make -j32 buildworld' on XLR
> SMP, and as far as I can see, it is caused by the way page table pages
> are allocated when we have >512MB memory.
>
> The attached patch
> (http://sites.google.com/site/cjayachandran/files//pmap-page-alloc.patch)
> changes the way pages for the PTE/PDE entries are allocated in systems
> with >512M. This scheme uses vm_phys_alloc_contig to allocate page
> table pages KSEG0 region and  has a UMA zone to cache them. This will
> allow us to avoid multi-TLB misses that happen when we access page
> table entires in the TLB miss handler. I think a similar approach can
> be taken to allocate 8K page-pairs for N32(for >4GB RAM) and N64.
>
> With this patch, 'make -j32 buildworld' consistently works with 32
> cpus  on an XLR is booted SMP with 4GB RAM.  With the current page
> alloc code, I get a hang about an hour into buildworld.
>
> The patch is based off r206712 - I still have not found a fix for the
> crashes I see in versions after that. Please let me know your
> comments,  especially if you can think of a better way of doing this.
> I can make a version of this patch for HEAD if this is acceptable.
>
> JC.

[-- Attachment #2 --]
Index: sys/mips/mips/pmap.c
===================================================================
--- sys/mips/mips/pmap.c	(revision 207451)
+++ sys/mips/mips/pmap.c	(working copy)
@@ -80,6 +80,7 @@
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
+#include <vm/vm_phys.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <vm/vm_kern.h>
@@ -109,6 +110,10 @@
 #define	PMAP_SHPGPERPROC 200
 #endif
 
+#if defined(TARGET_XLR_XLS)
+#define HIGHMEM_SUPPORT
+#endif
+
 #if !defined(PMAP_DIAGNOSTIC)
 #define	PMAP_INLINE __inline
 #else
@@ -183,12 +188,18 @@
 static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot);
 static void pmap_TLB_invalidate_kernel(vm_offset_t);
 static void pmap_TLB_update_kernel(vm_offset_t, pt_entry_t);
+static vm_page_t pmap_alloc_pte_page(pmap_t, unsigned int, int, vm_offset_t *);
+static void pmap_release_pte_page(vm_page_t);
 
 #ifdef SMP
 static void pmap_invalidate_page_action(void *arg);
 static void pmap_invalidate_all_action(void *arg);
 static void pmap_update_page_action(void *arg);
+#endif
 
+#ifdef HIGHMEM_SUPPORT
+static void * pmap_ptpgzone_allocf(uma_zone_t, int, u_int8_t*, int);
+static uma_zone_t ptpgzone;
 #endif
 
 struct local_sysmaps {
@@ -530,6 +541,12 @@
 	pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count;
 	pv_entry_high_water = 9 * (pv_entry_max / 10);
 	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
+
+#ifdef HIGHMEM_SUPPORT
+	ptpgzone = uma_zcreate("PT ENTRY", PAGE_SIZE, NULL,
+	    NULL, NULL, NULL, PAGE_SIZE-1, UMA_ZONE_NOFREE);
+	uma_zone_set_allocf(ptpgzone, pmap_ptpgzone_allocf);
+#endif
 }
 
 /***************************************************
@@ -887,7 +904,7 @@
 	/*
 	 * If the page is finally unwired, simply free it.
 	 */
-	vm_page_free_zero(m);
+	pmap_release_pte_page(m);
 	atomic_subtract_int(&cnt.v_wire_count, 1);
 	return (1);
 }
@@ -947,6 +964,118 @@
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 }
 
+#ifdef HIGHMEM_SUPPORT
+static void *
+pmap_ptpgzone_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+	vm_page_t m;
+	vm_paddr_t paddr;
+
+	*flags = UMA_SLAB_PRIV;
+	m = vm_phys_alloc_contig(1, 0, MIPS_KSEG0_LARGEST_PHYS,
+	     PAGE_SIZE, PAGE_SIZE);
+	if (m == NULL)
+		return (NULL);
+
+	paddr = VM_PAGE_TO_PHYS(m);
+	return ((void *)MIPS_PHYS_TO_KSEG0(paddr));
+}	
+
+static vm_page_t
+pmap_alloc_pte_page(pmap_t pmap, unsigned int index, int wait, vm_offset_t *vap)
+{
+	vm_paddr_t paddr;
+	void *va;
+	vm_page_t m;
+	int locked;
+
+	locked = mtx_owned(&pmap->pm_mtx);
+	if (locked) {
+		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+		PMAP_UNLOCK(pmap);
+		vm_page_unlock_queues();
+	}
+	va = uma_zalloc(ptpgzone, wait);
+	if (locked) {
+		vm_page_lock_queues();
+		PMAP_LOCK(pmap);
+	}
+	if (va == NULL)
+		return (NULL);
+
+	paddr = MIPS_KSEG0_TO_PHYS(va);
+	m = PHYS_TO_VM_PAGE(paddr);
+	
+	if ((m->flags & PG_ZERO) == 0)
+		bzero(va, PAGE_SIZE);
+	m->pindex = index;
+	m->valid = VM_PAGE_BITS_ALL;
+	m->wire_count = 1;
+	atomic_add_int(&cnt.v_wire_count, 1);
+	*vap = (vm_offset_t)va;
+	return (m);
+}
+
+static void
+pmap_release_pte_page(vm_page_t m)
+{
+	void *va;
+	vm_paddr_t paddr;
+
+	paddr = VM_PAGE_TO_PHYS(m);
+	va = (void *)MIPS_PHYS_TO_KSEG0(paddr);
+	uma_zfree(ptpgzone, va);
+}
+#else
+static vm_page_t
+pmap_alloc_pte_page(pmap_t pmap, unsigned int index, int wait, vm_offset_t *vap)
+{
+	vm_offset_t va;
+	vm_page_t m;
+	int locked, req;
+
+	locked = mtx_owned(&pmap->pm_mtx);
+	req = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
+	if (wait & M_WAITOK)
+		req |= VM_ALLOC_NORMAL;
+	else
+		req |= VM_ALLOC_INTERRUPT;
+
+	m = vm_page_alloc(NULL, index, req);
+	if (m == NULL) {
+		if (wait & M_WAITOK) {
+			if (locked) {
+				mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+				PMAP_UNLOCK(pmap);
+				vm_page_unlock_queues();
+			}
+			VM_WAIT;
+			if (locked) {
+				vm_page_lock_queues();
+				PMAP_LOCK(pmap);
+			}
+		}
+		return NULL;
+	}
+	
+	va = MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(m));
+	if ((m->flags & PG_ZERO) == 0)
+		bzero((void *)va, PAGE_SIZE);
+	else
+		vm_page_flag_clear(m, PG_ZERO);
+	
+	m->valid = VM_PAGE_BITS_ALL;
+	*vap = (vm_offset_t)va;
+	return (m);
+}
+
+static void
+pmap_release_pte_page(vm_page_t m)
+{
+	vm_page_free(m);
+}
+#endif
+
 /*
  * Initialize a preallocated and zeroed pmap structure,
  * such as one in a vmspace structure.
@@ -955,37 +1084,16 @@
 pmap_pinit(pmap_t pmap)
 {
 	vm_offset_t ptdva;
-	vm_paddr_t ptdpa;
 	vm_page_t ptdpg;
 	int i;
-	int req;
 
 	PMAP_LOCK_INIT(pmap);
 
-	req = VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
-	    VM_ALLOC_ZERO;
-
 	/*
 	 * allocate the page directory page
 	 */
-	while ((ptdpg = vm_page_alloc(NULL, NUSERPGTBLS, req)) == NULL)
-		VM_WAIT;
-
-	ptdpg->valid = VM_PAGE_BITS_ALL;
-
-	ptdpa = VM_PAGE_TO_PHYS(ptdpg);
-	if (ptdpa < MIPS_KSEG0_LARGEST_PHYS) {
-		ptdva = MIPS_PHYS_TO_KSEG0(ptdpa);
-	} else {
-		ptdva = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
-		if (ptdva == 0)
-			panic("pmap_pinit: unable to allocate kva");
-		pmap_kenter(ptdva, ptdpa);
-	}
-
+	ptdpg = pmap_alloc_pte_page(pmap, NUSERPGTBLS, M_WAITOK, &ptdva);
 	pmap->pm_segtab = (pd_entry_t *)ptdva;
-	if ((ptdpg->flags & PG_ZERO) == 0)
-		bzero(pmap->pm_segtab, PAGE_SIZE);
 
 	pmap->pm_active = 0;
 	pmap->pm_ptphint = NULL;
@@ -1006,7 +1114,7 @@
 static vm_page_t
 _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
 {
-	vm_offset_t pteva, ptepa;
+	vm_offset_t pteva;
 	vm_page_t m;
 	int req;
 
@@ -1018,60 +1126,22 @@
 	/*
 	 * Find or fabricate a new pagetable page
 	 */
-	if ((m = vm_page_alloc(NULL, ptepindex, req)) == NULL) {
-		if (flags & M_WAITOK) {
-			PMAP_UNLOCK(pmap);
-			vm_page_unlock_queues();
-			VM_WAIT;
-			vm_page_lock_queues();
-			PMAP_LOCK(pmap);
-		}
-		/*
-		 * Indicate the need to retry.	While waiting, the page
-		 * table page may have been allocated.
-		 */
+	m = pmap_alloc_pte_page(pmap, ptepindex, flags, &pteva);
+	if (m == NULL)
 		return (NULL);
-	}
-	if ((m->flags & PG_ZERO) == 0)
-		pmap_zero_page(m);
 
-	KASSERT(m->queue == PQ_NONE,
-	    ("_pmap_allocpte: %p->queue != PQ_NONE", m));
-
 	/*
 	 * Map the pagetable page into the process address space, if it
 	 * isn't already there.
 	 */
 
 	pmap->pm_stats.resident_count++;
-
-	ptepa = VM_PAGE_TO_PHYS(m);
-	if (ptepa < MIPS_KSEG0_LARGEST_PHYS) {
-		pteva = MIPS_PHYS_TO_KSEG0(ptepa);
-	} else {
-		pteva = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
-		if (pteva == 0)
-			panic("_pmap_allocpte: unable to allocate kva");
-		pmap_kenter(pteva, ptepa);
-	}
-
 	pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva;
 
 	/*
 	 * Set the page table hint
 	 */
 	pmap->pm_ptphint = m;
-
-	/*
-	 * Kernel page tables are allocated in pmap_bootstrap() or
-	 * pmap_growkernel().
-	 */
-	if (is_kernel_pmap(pmap))
-		panic("_pmap_allocpte() called for kernel pmap\n");
-
-	m->valid = VM_PAGE_BITS_ALL;
-	vm_page_flag_clear(m, PG_ZERO);
-
 	return (m);
 }
 
@@ -1158,17 +1228,12 @@
 	ptdva = (vm_offset_t)pmap->pm_segtab;
 	ptdpg = PHYS_TO_VM_PAGE(vtophys(ptdva));
 
-	if (ptdva >= VM_MIN_KERNEL_ADDRESS) {
-		pmap_kremove(ptdva);
-		kmem_free(kernel_map, ptdva, PAGE_SIZE);
-	} else {
-		KASSERT(MIPS_IS_KSEG0_ADDR(ptdva),
-		    ("pmap_release: 0x%0lx is not in kseg0", (long)ptdva));
-	}
+	KASSERT(MIPS_IS_KSEG0_ADDR(ptdva),
+	    ("pmap_release: 0x%0lx is not in kseg0", (long)ptdva));
 
 	ptdpg->wire_count--;
 	atomic_subtract_int(&cnt.v_wire_count, 1);
-	vm_page_free_zero(ptdpg);
+	pmap_release_pte_page(ptdpg);
 	PMAP_LOCK_DESTROY(pmap);
 }
 
@@ -1178,10 +1243,10 @@
 void
 pmap_growkernel(vm_offset_t addr)
 {
-	vm_offset_t ptppaddr;
+	vm_offset_t pageva;
 	vm_page_t nkpg;
 	pt_entry_t *pte;
-	int i, req;
+	int i;
 
 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
 	if (kernel_vm_end == 0) {
@@ -1213,26 +1278,13 @@
 		/*
 		 * This index is bogus, but out of the way
 		 */
-		req = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ;
-		nkpg = vm_page_alloc(NULL, nkpt, req);
+		nkpg = pmap_alloc_pte_page(kernel_pmap, nkpt, M_NOWAIT, &pageva);
+
 		if (!nkpg)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
-
-		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
-		if (ptppaddr >= MIPS_KSEG0_LARGEST_PHYS) {
-			/*
-			 * We need to do something here, but I am not sure
-			 * what. We can access anything in the 0 - 512Meg
-			 * region, but if we get a page to go in the kernel
-			 * segmap that is outside of of that we really need
-			 * to have another mapping beyond the temporary ones
-			 * I have. Not sure how to do this yet. FIXME FIXME.
-			 */
-			panic("Gak, can't handle a k-page table outside of lower 512Meg");
-		}
-		pte = (pt_entry_t *)MIPS_PHYS_TO_KSEG0(ptppaddr);
+		pte = (pt_entry_t *)pageva;
 		segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte;
 
 		/*
@@ -1396,7 +1448,8 @@
 		}
 	}
 
-	KASSERT(pv != NULL, ("pmap_remove_entry: pv not found"));
+	KASSERT(pv != NULL, ("pmap_remove_entry: pv not found, pa %lx va %lx",
+	     (u_long)VM_PAGE_TO_PHYS(m), (u_long)va));
 	TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
 	m->md.pv_list_count--;
 	if (TAILQ_FIRST(&m->md.pv_list) == NULL)
home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?m2m98a59be81004301616nd3f1b6b0yc0db67ad93a870f7>