Date: Sat, 1 May 2010 04:46:50 +0530 From: "C. Jayachandran" <c.jayachandran@gmail.com> To: freebsd-mips@freebsd.org, Randall Stewart <rrs@lakerest.net> Subject: Re: Alternate scheme for page table page allocation Message-ID: <m2m98a59be81004301616nd3f1b6b0yc0db67ad93a870f7@mail.gmail.com> In-Reply-To: <t2m98a59be81004300404x408e4b7fx8eea156061153b30@mail.gmail.com>
index | next in thread | previous in thread | raw e-mail
[-- Attachment #1 --] Here's a version which will apply on HEAD (also at http://sites.google.com/site/cjayachandran/files). Regards, JC. On Fri, Apr 30, 2010 at 4:34 PM, C. Jayachandran <c.jayachandran@gmail.com> wrote: > I was looking at a hang I get during 'make -j32 buildworld' on XLR > SMP, and as far as I can see, it is caused by the way page table pages > are allocated when we have >512MB memory. > > The attached patch > (http://sites.google.com/site/cjayachandran/files//pmap-page-alloc.patch) > changes the way pages for the PTE/PDE entries are allocated in systems > with >512M. This scheme uses vm_phys_alloc_contig to allocate page > table pages KSEG0 region and has a UMA zone to cache them. This will > allow us to avoid multi-TLB misses that happen when we access page > table entires in the TLB miss handler. I think a similar approach can > be taken to allocate 8K page-pairs for N32(for >4GB RAM) and N64. > > With this patch, 'make -j32 buildworld' consistently works with 32 > cpus on an XLR is booted SMP with 4GB RAM. With the current page > alloc code, I get a hang about an hour into buildworld. > > The patch is based off r206712 - I still have not found a fix for the > crashes I see in versions after that. Please let me know your > comments, especially if you can think of a better way of doing this. > I can make a version of this patch for HEAD if this is acceptable. > > JC. [-- Attachment #2 --] Index: sys/mips/mips/pmap.c =================================================================== --- sys/mips/mips/pmap.c (revision 207451) +++ sys/mips/mips/pmap.c (working copy) @@ -80,6 +80,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> +#include <vm/vm_phys.h> #include <sys/lock.h> #include <sys/mutex.h> #include <vm/vm_kern.h> @@ -109,6 +110,10 @@ #define PMAP_SHPGPERPROC 200 #endif +#if defined(TARGET_XLR_XLS) +#define HIGHMEM_SUPPORT +#endif + #if !defined(PMAP_DIAGNOSTIC) #define PMAP_INLINE __inline #else @@ -183,12 +188,18 @@ static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); static void pmap_TLB_invalidate_kernel(vm_offset_t); static void pmap_TLB_update_kernel(vm_offset_t, pt_entry_t); +static vm_page_t pmap_alloc_pte_page(pmap_t, unsigned int, int, vm_offset_t *); +static void pmap_release_pte_page(vm_page_t); #ifdef SMP static void pmap_invalidate_page_action(void *arg); static void pmap_invalidate_all_action(void *arg); static void pmap_update_page_action(void *arg); +#endif +#ifdef HIGHMEM_SUPPORT +static void * pmap_ptpgzone_allocf(uma_zone_t, int, u_int8_t*, int); +static uma_zone_t ptpgzone; #endif struct local_sysmaps { @@ -530,6 +541,12 @@ pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count; pv_entry_high_water = 9 * (pv_entry_max / 10); uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); + +#ifdef HIGHMEM_SUPPORT + ptpgzone = uma_zcreate("PT ENTRY", PAGE_SIZE, NULL, + NULL, NULL, NULL, PAGE_SIZE-1, UMA_ZONE_NOFREE); + uma_zone_set_allocf(ptpgzone, pmap_ptpgzone_allocf); +#endif } /*************************************************** @@ -887,7 +904,7 @@ /* * If the page is finally unwired, simply free it. */ - vm_page_free_zero(m); + pmap_release_pte_page(m); atomic_subtract_int(&cnt.v_wire_count, 1); return (1); } @@ -947,6 +964,118 @@ bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } +#ifdef HIGHMEM_SUPPORT +static void * +pmap_ptpgzone_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + vm_page_t m; + vm_paddr_t paddr; + + *flags = UMA_SLAB_PRIV; + m = vm_phys_alloc_contig(1, 0, MIPS_KSEG0_LARGEST_PHYS, + PAGE_SIZE, PAGE_SIZE); + if (m == NULL) + return (NULL); + + paddr = VM_PAGE_TO_PHYS(m); + return ((void *)MIPS_PHYS_TO_KSEG0(paddr)); +} + +static vm_page_t +pmap_alloc_pte_page(pmap_t pmap, unsigned int index, int wait, vm_offset_t *vap) +{ + vm_paddr_t paddr; + void *va; + vm_page_t m; + int locked; + + locked = mtx_owned(&pmap->pm_mtx); + if (locked) { + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); + } + va = uma_zalloc(ptpgzone, wait); + if (locked) { + vm_page_lock_queues(); + PMAP_LOCK(pmap); + } + if (va == NULL) + return (NULL); + + paddr = MIPS_KSEG0_TO_PHYS(va); + m = PHYS_TO_VM_PAGE(paddr); + + if ((m->flags & PG_ZERO) == 0) + bzero(va, PAGE_SIZE); + m->pindex = index; + m->valid = VM_PAGE_BITS_ALL; + m->wire_count = 1; + atomic_add_int(&cnt.v_wire_count, 1); + *vap = (vm_offset_t)va; + return (m); +} + +static void +pmap_release_pte_page(vm_page_t m) +{ + void *va; + vm_paddr_t paddr; + + paddr = VM_PAGE_TO_PHYS(m); + va = (void *)MIPS_PHYS_TO_KSEG0(paddr); + uma_zfree(ptpgzone, va); +} +#else +static vm_page_t +pmap_alloc_pte_page(pmap_t pmap, unsigned int index, int wait, vm_offset_t *vap) +{ + vm_offset_t va; + vm_page_t m; + int locked, req; + + locked = mtx_owned(&pmap->pm_mtx); + req = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; + if (wait & M_WAITOK) + req |= VM_ALLOC_NORMAL; + else + req |= VM_ALLOC_INTERRUPT; + + m = vm_page_alloc(NULL, index, req); + if (m == NULL) { + if (wait & M_WAITOK) { + if (locked) { + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); + } + VM_WAIT; + if (locked) { + vm_page_lock_queues(); + PMAP_LOCK(pmap); + } + } + return NULL; + } + + va = MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(m)); + if ((m->flags & PG_ZERO) == 0) + bzero((void *)va, PAGE_SIZE); + else + vm_page_flag_clear(m, PG_ZERO); + + m->valid = VM_PAGE_BITS_ALL; + *vap = (vm_offset_t)va; + return (m); +} + +static void +pmap_release_pte_page(vm_page_t m) +{ + vm_page_free(m); +} +#endif + /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. @@ -955,37 +1084,16 @@ pmap_pinit(pmap_t pmap) { vm_offset_t ptdva; - vm_paddr_t ptdpa; vm_page_t ptdpg; int i; - int req; PMAP_LOCK_INIT(pmap); - req = VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | - VM_ALLOC_ZERO; - /* * allocate the page directory page */ - while ((ptdpg = vm_page_alloc(NULL, NUSERPGTBLS, req)) == NULL) - VM_WAIT; - - ptdpg->valid = VM_PAGE_BITS_ALL; - - ptdpa = VM_PAGE_TO_PHYS(ptdpg); - if (ptdpa < MIPS_KSEG0_LARGEST_PHYS) { - ptdva = MIPS_PHYS_TO_KSEG0(ptdpa); - } else { - ptdva = kmem_alloc_nofault(kernel_map, PAGE_SIZE); - if (ptdva == 0) - panic("pmap_pinit: unable to allocate kva"); - pmap_kenter(ptdva, ptdpa); - } - + ptdpg = pmap_alloc_pte_page(pmap, NUSERPGTBLS, M_WAITOK, &ptdva); pmap->pm_segtab = (pd_entry_t *)ptdva; - if ((ptdpg->flags & PG_ZERO) == 0) - bzero(pmap->pm_segtab, PAGE_SIZE); pmap->pm_active = 0; pmap->pm_ptphint = NULL; @@ -1006,7 +1114,7 @@ static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) { - vm_offset_t pteva, ptepa; + vm_offset_t pteva; vm_page_t m; int req; @@ -1018,60 +1126,22 @@ /* * Find or fabricate a new pagetable page */ - if ((m = vm_page_alloc(NULL, ptepindex, req)) == NULL) { - if (flags & M_WAITOK) { - PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); - VM_WAIT; - vm_page_lock_queues(); - PMAP_LOCK(pmap); - } - /* - * Indicate the need to retry. While waiting, the page - * table page may have been allocated. - */ + m = pmap_alloc_pte_page(pmap, ptepindex, flags, &pteva); + if (m == NULL) return (NULL); - } - if ((m->flags & PG_ZERO) == 0) - pmap_zero_page(m); - KASSERT(m->queue == PQ_NONE, - ("_pmap_allocpte: %p->queue != PQ_NONE", m)); - /* * Map the pagetable page into the process address space, if it * isn't already there. */ pmap->pm_stats.resident_count++; - - ptepa = VM_PAGE_TO_PHYS(m); - if (ptepa < MIPS_KSEG0_LARGEST_PHYS) { - pteva = MIPS_PHYS_TO_KSEG0(ptepa); - } else { - pteva = kmem_alloc_nofault(kernel_map, PAGE_SIZE); - if (pteva == 0) - panic("_pmap_allocpte: unable to allocate kva"); - pmap_kenter(pteva, ptepa); - } - pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva; /* * Set the page table hint */ pmap->pm_ptphint = m; - - /* - * Kernel page tables are allocated in pmap_bootstrap() or - * pmap_growkernel(). - */ - if (is_kernel_pmap(pmap)) - panic("_pmap_allocpte() called for kernel pmap\n"); - - m->valid = VM_PAGE_BITS_ALL; - vm_page_flag_clear(m, PG_ZERO); - return (m); } @@ -1158,17 +1228,12 @@ ptdva = (vm_offset_t)pmap->pm_segtab; ptdpg = PHYS_TO_VM_PAGE(vtophys(ptdva)); - if (ptdva >= VM_MIN_KERNEL_ADDRESS) { - pmap_kremove(ptdva); - kmem_free(kernel_map, ptdva, PAGE_SIZE); - } else { - KASSERT(MIPS_IS_KSEG0_ADDR(ptdva), - ("pmap_release: 0x%0lx is not in kseg0", (long)ptdva)); - } + KASSERT(MIPS_IS_KSEG0_ADDR(ptdva), + ("pmap_release: 0x%0lx is not in kseg0", (long)ptdva)); ptdpg->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); - vm_page_free_zero(ptdpg); + pmap_release_pte_page(ptdpg); PMAP_LOCK_DESTROY(pmap); } @@ -1178,10 +1243,10 @@ void pmap_growkernel(vm_offset_t addr) { - vm_offset_t ptppaddr; + vm_offset_t pageva; vm_page_t nkpg; pt_entry_t *pte; - int i, req; + int i; mtx_assert(&kernel_map->system_mtx, MA_OWNED); if (kernel_vm_end == 0) { @@ -1213,26 +1278,13 @@ /* * This index is bogus, but out of the way */ - req = VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; - nkpg = vm_page_alloc(NULL, nkpt, req); + nkpg = pmap_alloc_pte_page(kernel_pmap, nkpt, M_NOWAIT, &pageva); + if (!nkpg) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; - - ptppaddr = VM_PAGE_TO_PHYS(nkpg); - if (ptppaddr >= MIPS_KSEG0_LARGEST_PHYS) { - /* - * We need to do something here, but I am not sure - * what. We can access anything in the 0 - 512Meg - * region, but if we get a page to go in the kernel - * segmap that is outside of of that we really need - * to have another mapping beyond the temporary ones - * I have. Not sure how to do this yet. FIXME FIXME. - */ - panic("Gak, can't handle a k-page table outside of lower 512Meg"); - } - pte = (pt_entry_t *)MIPS_PHYS_TO_KSEG0(ptppaddr); + pte = (pt_entry_t *)pageva; segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte; /* @@ -1396,7 +1448,8 @@ } } - KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); + KASSERT(pv != NULL, ("pmap_remove_entry: pv not found, pa %lx va %lx", + (u_long)VM_PAGE_TO_PHYS(m), (u_long)va)); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; if (TAILQ_FIRST(&m->md.pv_list) == NULL)home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?m2m98a59be81004301616nd3f1b6b0yc0db67ad93a870f7>
