Date: Wed, 21 Jul 2010 09:27:00 +0000 (UTC) From: "Jayachandran C." <jchandra@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r210327 - in head/sys: mips/include mips/mips vm Message-ID: <201007210927.o6L9R07L074578@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jchandra Date: Wed Jul 21 09:27:00 2010 New Revision: 210327 URL: http://svn.freebsd.org/changeset/base/210327 Log: Redo the page table page allocation on MIPS, as suggested by alc@. The UMA zone based allocation is replaced by a scheme that creates a new free page list for the KSEG0 region, and a new function in sys/vm that allocates pages from a specific free page list. This also fixes a race condition introduced by the UMA based page table page allocation code. Dropping the page queue and pmap locks before the call to uma_zfree, and re-acquiring them afterwards will introduce a race condtion(noted by alc@). The changes are : - Revert the earlier changes in MIPS pmap.c that added UMA zone for page table pages. - Add a new freelist VM_FREELIST_HIGHMEM to MIPS vmparam.h for memory that is not directly mapped (in 32bit kernel). Normal page allocations will first try the HIGHMEM freelist and then the default(direct mapped) freelist. - Add a new function 'vm_page_t vm_page_alloc_freelist(int flind, int order, int req)' to vm/vm_page.c to allocate a page from a specified freelist. The MIPS page table pages will be allocated using this function from the freelist containing direct mapped pages. - Move the page initialization code from vm_phys_alloc_contig() to a new function vm_page_alloc_init(), and use this function to initialize pages in vm_page_alloc_freelist() too. - Split the function vm_phys_alloc_pages(int pool, int order) to create vm_phys_alloc_freelist_pages(int flind, int pool, int order), and use this function from both vm_page_alloc_freelist() and vm_phys_alloc_pages(). Reviewed by: alc Modified: head/sys/mips/include/vmparam.h head/sys/mips/mips/pmap.c head/sys/vm/vm_page.c head/sys/vm/vm_page.h head/sys/vm/vm_phys.c head/sys/vm/vm_phys.h Modified: head/sys/mips/include/vmparam.h ============================================================================== --- head/sys/mips/include/vmparam.h Wed Jul 21 09:20:40 2010 (r210326) +++ head/sys/mips/include/vmparam.h Wed Jul 21 09:27:00 2010 (r210327) @@ -125,7 +125,6 @@ #define VM_NRESERVLEVEL 0 #endif - /* virtual sizes (bytes) for various kernel submaps */ #ifndef VM_KMEM_SIZE #define VM_KMEM_SIZE (12 * 1024 * 1024) @@ -174,13 +173,24 @@ #define VM_FREEPOOL_DIRECT 1 /* - * we support 1 free list: + * we support 2 free lists: * - * - DEFAULT for all systems + * - DEFAULT for direct mapped (KSEG0) pages. + * Note: This usage of DEFAULT may be misleading because we use + * DEFAULT for allocating direct mapped pages. The normal page + * allocations use HIGHMEM if available, and then DEFAULT. + * - HIGHMEM for other pages */ - +#ifdef __mips_n64 #define VM_NFREELIST 1 #define VM_FREELIST_DEFAULT 0 +#else +#define VM_NFREELIST 2 +#define VM_FREELIST_DEFAULT 1 +#define VM_FREELIST_HIGHMEM 0 +#define VM_FREELIST_DIRECT VM_FREELIST_DEFAULT +#define VM_HIGHMEM_ADDRESS ((vm_paddr_t)0x20000000) +#endif /* * The largest allocation size is 1MB. Modified: head/sys/mips/mips/pmap.c ============================================================================== --- head/sys/mips/mips/pmap.c Wed Jul 21 09:20:40 2010 (r210326) +++ head/sys/mips/mips/pmap.c Wed Jul 21 09:27:00 2010 (r210327) @@ -187,8 +187,8 @@ static vm_page_t pmap_allocpte(pmap_t pm static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); -static vm_page_t pmap_alloc_pte_page(pmap_t, unsigned int, int, vm_offset_t *); -static void pmap_release_pte_page(vm_page_t); +static vm_page_t pmap_alloc_pte_page(unsigned int index, int req); +static void pmap_grow_pte_page_cache(void); #ifdef SMP static void pmap_invalidate_page_action(void *arg); @@ -196,10 +196,6 @@ static void pmap_invalidate_all_action(v static void pmap_update_page_action(void *arg); #endif -static void pmap_ptpgzone_dtor(void *mem, int size, void *arg); -static void *pmap_ptpgzone_allocf(uma_zone_t, int, u_int8_t *, int); -static uma_zone_t ptpgzone; - #if !defined(__mips_n64) struct local_sysmaps { vm_offset_t base; @@ -539,10 +535,6 @@ pmap_init(void) pv_entry_max = PMAP_SHPGPERPROC * maxproc + cnt.v_page_count; pv_entry_high_water = 9 * (pv_entry_max / 10); uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); - - ptpgzone = uma_zcreate("PT ENTRY", PAGE_SIZE, NULL, pmap_ptpgzone_dtor, - NULL, NULL, PAGE_SIZE - 1, UMA_ZONE_NOFREE | UMA_ZONE_ZINIT); - uma_zone_set_allocf(ptpgzone, pmap_ptpgzone_allocf); } /*************************************************** @@ -882,12 +874,8 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_pa /* * If the page is finally unwired, simply free it. */ + vm_page_free_zero(m); atomic_subtract_int(&cnt.v_wire_count, 1); - PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); - pmap_release_pte_page(m); - vm_page_lock_queues(); - PMAP_LOCK(pmap); return (1); } @@ -947,95 +935,30 @@ pmap_pinit0(pmap_t pmap) } static void -pmap_ptpgzone_dtor(void *mem, int size, void *arg) +pmap_grow_pte_page_cache() { -#ifdef INVARIANTS - static char zeropage[PAGE_SIZE]; - KASSERT(size == PAGE_SIZE, - ("pmap_ptpgzone_dtor: invalid size %d", size)); - KASSERT(bcmp(mem, zeropage, PAGE_SIZE) == 0, - ("pmap_ptpgzone_dtor: freeing a non-zeroed page")); -#endif + vm_contig_grow_cache(3, 0, MIPS_KSEG0_LARGEST_PHYS); } -static void * -pmap_ptpgzone_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) -{ - vm_page_t m; - vm_paddr_t paddr; - int tries; - - KASSERT(bytes == PAGE_SIZE, - ("pmap_ptpgzone_allocf: invalid allocation size %d", bytes)); - - *flags = UMA_SLAB_PRIV; - tries = 0; -retry: - m = vm_phys_alloc_contig(1, 0, MIPS_KSEG0_LARGEST_PHYS, - PAGE_SIZE, PAGE_SIZE); - if (m == NULL) { - if (tries < ((wait & M_NOWAIT) != 0 ? 1 : 3)) { - vm_contig_grow_cache(tries, 0, MIPS_KSEG0_LARGEST_PHYS); - tries++; - goto retry; - } else - return (NULL); - } - - paddr = VM_PAGE_TO_PHYS(m); - return ((void *)MIPS_PHYS_TO_KSEG0(paddr)); -} - static vm_page_t -pmap_alloc_pte_page(pmap_t pmap, unsigned int index, int wait, vm_offset_t *vap) +pmap_alloc_pte_page(unsigned int index, int req) { - vm_paddr_t paddr; - void *va; vm_page_t m; - int locked; - locked = mtx_owned(&pmap->pm_mtx); - if (locked) { - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - PMAP_UNLOCK(pmap); - vm_page_unlock_queues(); - } - va = uma_zalloc(ptpgzone, wait); - if (locked) { - vm_page_lock_queues(); - PMAP_LOCK(pmap); - } - if (va == NULL) + m = vm_page_alloc_freelist(VM_FREELIST_DIRECT, 0, req); + if (m == NULL) return (NULL); - paddr = MIPS_KSEG0_TO_PHYS(va); - m = PHYS_TO_VM_PAGE(paddr); - - if (!locked) - vm_page_lock_queues(); - m->pindex = index; - m->valid = VM_PAGE_BITS_ALL; - m->wire_count = 1; - if (!locked) - vm_page_unlock_queues(); + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + m->pindex = index; atomic_add_int(&cnt.v_wire_count, 1); - *vap = (vm_offset_t)va; + m->wire_count = 1; return (m); } -static void -pmap_release_pte_page(vm_page_t m) -{ - void *va; - vm_paddr_t paddr; - - paddr = VM_PAGE_TO_PHYS(m); - va = (void *)MIPS_PHYS_TO_KSEG0(paddr); - uma_zfree(ptpgzone, va); -} - /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. @@ -1052,10 +975,10 @@ pmap_pinit(pmap_t pmap) /* * allocate the page directory page */ - ptdpg = pmap_alloc_pte_page(pmap, NUSERPGTBLS, M_WAITOK, &ptdva); - if (ptdpg == NULL) - return (0); + while ((ptdpg = pmap_alloc_pte_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) + pmap_grow_pte_page_cache(); + ptdva = MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(ptdpg)); pmap->pm_segtab = (pd_entry_t *)ptdva; pmap->pm_active = 0; pmap->pm_ptphint = NULL; @@ -1086,15 +1009,28 @@ _pmap_allocpte(pmap_t pmap, unsigned pte /* * Find or fabricate a new pagetable page */ - m = pmap_alloc_pte_page(pmap, ptepindex, flags, &pteva); - if (m == NULL) + if ((m = pmap_alloc_pte_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { + if (flags & M_WAITOK) { + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); + pmap_grow_pte_page_cache(); + vm_page_lock_queues(); + PMAP_LOCK(pmap); + } + + /* + * Indicate the need to retry. While waiting, the page + * table page may have been allocated. + */ return (NULL); + } /* * Map the pagetable page into the process address space, if it * isn't already there. */ + pteva = MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(m)); pmap->pm_stats.resident_count++; pmap->pm_segtab[ptepindex] = (pd_entry_t)pteva; @@ -1190,7 +1126,7 @@ pmap_release(pmap_t pmap) ptdpg->wire_count--; atomic_subtract_int(&cnt.v_wire_count, 1); - pmap_release_pte_page(ptdpg); + vm_page_free_zero(ptdpg); PMAP_LOCK_DESTROY(pmap); } @@ -1200,7 +1136,6 @@ pmap_release(pmap_t pmap) void pmap_growkernel(vm_offset_t addr) { - vm_offset_t pageva; vm_page_t nkpg; pt_entry_t *pte; int i; @@ -1235,14 +1170,13 @@ pmap_growkernel(vm_offset_t addr) /* * This index is bogus, but out of the way */ - nkpg = pmap_alloc_pte_page(kernel_pmap, nkpt, M_NOWAIT, &pageva); - + nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT); if (!nkpg) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; - pte = (pt_entry_t *)pageva; - segtab_pde(kernel_segmap, kernel_vm_end) = pte; + pte = (pt_entry_t *)MIPS_PHYS_TO_KSEG0(VM_PAGE_TO_PHYS(nkpg)); + segtab_pde(kernel_segmap, kernel_vm_end) = (pd_entry_t)pte; /* * The R[4-7]?00 stores only one copy of the Global bit in Modified: head/sys/vm/vm_page.c ============================================================================== --- head/sys/vm/vm_page.c Wed Jul 21 09:20:40 2010 (r210326) +++ head/sys/vm/vm_page.c Wed Jul 21 09:27:00 2010 (r210327) @@ -1355,6 +1355,95 @@ vm_page_alloc(vm_object_t object, vm_pin } /* + * Initialize a page that has been freshly dequeued from a freelist. + * The caller has to drop the vnode returned, if it is not NULL. + * + * To be called with vm_page_queue_free_mtx held. + */ +struct vnode * +vm_page_alloc_init(vm_page_t m) +{ + struct vnode *drop; + vm_object_t m_object; + + KASSERT(m->queue == PQ_NONE, + ("vm_page_alloc_init: page %p has unexpected queue %d", + m, m->queue)); + KASSERT(m->wire_count == 0, + ("vm_page_alloc_init: page %p is wired", m)); + KASSERT(m->hold_count == 0, + ("vm_page_alloc_init: page %p is held", m)); + KASSERT(m->busy == 0, + ("vm_page_alloc_init: page %p is busy", m)); + KASSERT(m->dirty == 0, + ("vm_page_alloc_init: page %p is dirty", m)); + KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, + ("vm_page_alloc_init: page %p has unexpected memattr %d", + m, pmap_page_get_memattr(m))); + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + drop = NULL; + if ((m->flags & PG_CACHED) != 0) { + m->valid = 0; + m_object = m->object; + vm_page_cache_remove(m); + if (m_object->type == OBJT_VNODE && + m_object->cache == NULL) + drop = m_object->handle; + } else { + KASSERT(VM_PAGE_IS_FREE(m), + ("vm_page_alloc_init: page %p is not free", m)); + KASSERT(m->valid == 0, + ("vm_page_alloc_init: free page %p is valid", m)); + cnt.v_free_count--; + } + if (m->flags & PG_ZERO) + vm_page_zero_count--; + /* Don't clear the PG_ZERO flag; we'll need it later. */ + m->flags = PG_UNMANAGED | (m->flags & PG_ZERO); + m->oflags = 0; + /* Unmanaged pages don't use "act_count". */ + return (drop); +} + +/* + * vm_page_alloc_freelist: + * + * Allocate a page from the specified freelist with specified order. + * Only the ALLOC_CLASS values in req are honored, other request flags + * are ignored. + */ +vm_page_t +vm_page_alloc_freelist(int flind, int order, int req) +{ + struct vnode *drop; + vm_page_t m; + int page_req; + + m = NULL; + page_req = req & VM_ALLOC_CLASS_MASK; + mtx_lock(&vm_page_queue_free_mtx); + /* + * Do not allocate reserved pages unless the req has asked for it. + */ + if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || + (page_req == VM_ALLOC_SYSTEM && + cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || + (page_req == VM_ALLOC_INTERRUPT && + cnt.v_free_count + cnt.v_cache_count > 0)) { + m = vm_phys_alloc_freelist_pages(flind, VM_FREEPOOL_DIRECT, order); + } + if (m == NULL) { + mtx_unlock(&vm_page_queue_free_mtx); + return (NULL); + } + drop = vm_page_alloc_init(m); + mtx_unlock(&vm_page_queue_free_mtx); + if (drop) + vdrop(drop); + return (m); +} + +/* * vm_wait: (also see VM_WAIT macro) * * Block until free pages are available for allocation Modified: head/sys/vm/vm_page.h ============================================================================== --- head/sys/vm/vm_page.h Wed Jul 21 09:20:40 2010 (r210326) +++ head/sys/vm/vm_page.h Wed Jul 21 09:27:00 2010 (r210327) @@ -262,6 +262,7 @@ extern struct vpglocks pa_lock[]; * */ +struct vnode; extern int vm_page_zero_count; extern vm_page_t vm_page_array; /* First resident page in table */ @@ -339,6 +340,8 @@ void vm_pageq_remove(vm_page_t m); void vm_page_activate (vm_page_t); vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int); +vm_page_t vm_page_alloc_freelist(int, int, int); +struct vnode *vm_page_alloc_init(vm_page_t); vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int); void vm_page_cache(vm_page_t); void vm_page_cache_free(vm_object_t, vm_pindex_t, vm_pindex_t); Modified: head/sys/vm/vm_phys.c ============================================================================== --- head/sys/vm/vm_phys.c Wed Jul 21 09:20:40 2010 (r210326) +++ head/sys/vm/vm_phys.c Wed Jul 21 09:27:00 2010 (r210327) @@ -301,49 +301,67 @@ vm_phys_add_page(vm_paddr_t pa) vm_page_t vm_phys_alloc_pages(int pool, int order) { + vm_page_t m; + int flind; + + for (flind = 0; flind < vm_nfreelists; flind++) { + m = vm_phys_alloc_freelist_pages(flind, pool, order); + if (m != NULL) + return (m); + } + return (NULL); +} + +/* + * Find and dequeue a free page on the given free list, with the + * specified pool and order + */ +vm_page_t +vm_phys_alloc_freelist_pages(int flind, int pool, int order) +{ struct vm_freelist *fl; struct vm_freelist *alt; - int flind, oind, pind; + int oind, pind; vm_page_t m; + KASSERT(flind < VM_NFREELIST, + ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); KASSERT(pool < VM_NFREEPOOL, - ("vm_phys_alloc_pages: pool %d is out of range", pool)); + ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); KASSERT(order < VM_NFREEORDER, - ("vm_phys_alloc_pages: order %d is out of range", order)); + ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - for (flind = 0; flind < vm_nfreelists; flind++) { - fl = vm_phys_free_queues[flind][pool]; - for (oind = order; oind < VM_NFREEORDER; oind++) { - m = TAILQ_FIRST(&fl[oind].pl); + fl = vm_phys_free_queues[flind][pool]; + for (oind = order; oind < VM_NFREEORDER; oind++) { + m = TAILQ_FIRST(&fl[oind].pl); + if (m != NULL) { + TAILQ_REMOVE(&fl[oind].pl, m, pageq); + fl[oind].lcnt--; + m->order = VM_NFREEORDER; + vm_phys_split_pages(m, oind, fl, order); + return (m); + } + } + + /* + * The given pool was empty. Find the largest + * contiguous, power-of-two-sized set of pages in any + * pool. Transfer these pages to the given pool, and + * use them to satisfy the allocation. + */ + for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { + for (pind = 0; pind < VM_NFREEPOOL; pind++) { + alt = vm_phys_free_queues[flind][pind]; + m = TAILQ_FIRST(&alt[oind].pl); if (m != NULL) { - TAILQ_REMOVE(&fl[oind].pl, m, pageq); - fl[oind].lcnt--; + TAILQ_REMOVE(&alt[oind].pl, m, pageq); + alt[oind].lcnt--; m->order = VM_NFREEORDER; + vm_phys_set_pool(pool, m, oind); vm_phys_split_pages(m, oind, fl, order); return (m); } } - - /* - * The given pool was empty. Find the largest - * contiguous, power-of-two-sized set of pages in any - * pool. Transfer these pages to the given pool, and - * use them to satisfy the allocation. - */ - for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { - for (pind = 0; pind < VM_NFREEPOOL; pind++) { - alt = vm_phys_free_queues[flind][pind]; - m = TAILQ_FIRST(&alt[oind].pl); - if (m != NULL) { - TAILQ_REMOVE(&alt[oind].pl, m, pageq); - alt[oind].lcnt--; - m->order = VM_NFREEORDER; - vm_phys_set_pool(pool, m, oind); - vm_phys_split_pages(m, oind, fl, order); - return (m); - } - } - } } return (NULL); } @@ -592,7 +610,7 @@ vm_phys_alloc_contig(unsigned long npage { struct vm_freelist *fl; struct vm_phys_seg *seg; - vm_object_t m_object; + struct vnode *vp; vm_paddr_t pa, pa_last, size; vm_page_t deferred_vdrop_list, m, m_ret; int flind, i, oind, order, pind; @@ -687,50 +705,19 @@ done: vm_phys_split_pages(m_ret, oind, fl, order); for (i = 0; i < npages; i++) { m = &m_ret[i]; - KASSERT(m->queue == PQ_NONE, - ("vm_phys_alloc_contig: page %p has unexpected queue %d", - m, m->queue)); - KASSERT(m->wire_count == 0, - ("vm_phys_alloc_contig: page %p is wired", m)); - KASSERT(m->hold_count == 0, - ("vm_phys_alloc_contig: page %p is held", m)); - KASSERT(m->busy == 0, - ("vm_phys_alloc_contig: page %p is busy", m)); - KASSERT(m->dirty == 0, - ("vm_phys_alloc_contig: page %p is dirty", m)); - KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, - ("vm_phys_alloc_contig: page %p has unexpected memattr %d", - m, pmap_page_get_memattr(m))); - if ((m->flags & PG_CACHED) != 0) { - m->valid = 0; - m_object = m->object; - vm_page_cache_remove(m); - if (m_object->type == OBJT_VNODE && - m_object->cache == NULL) { - /* - * Enqueue the vnode for deferred vdrop(). - * - * Unmanaged pages don't use "pageq", so it - * can be safely abused to construct a short- - * lived queue of vnodes. - */ - m->pageq.tqe_prev = m_object->handle; - m->pageq.tqe_next = deferred_vdrop_list; - deferred_vdrop_list = m; - } - } else { - KASSERT(VM_PAGE_IS_FREE(m), - ("vm_phys_alloc_contig: page %p is not free", m)); - KASSERT(m->valid == 0, - ("vm_phys_alloc_contig: free page %p is valid", m)); - cnt.v_free_count--; + vp = vm_page_alloc_init(m); + if (vp != NULL) { + /* + * Enqueue the vnode for deferred vdrop(). + * + * Unmanaged pages don't use "pageq", so it + * can be safely abused to construct a short- + * lived queue of vnodes. + */ + m->pageq.tqe_prev = (void *)vp; + m->pageq.tqe_next = deferred_vdrop_list; + deferred_vdrop_list = m; } - if (m->flags & PG_ZERO) - vm_page_zero_count--; - /* Don't clear the PG_ZERO flag; we'll need it later. */ - m->flags = PG_UNMANAGED | (m->flags & PG_ZERO); - m->oflags = 0; - /* Unmanaged pages don't use "act_count". */ } for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { m = &m_ret[i]; Modified: head/sys/vm/vm_phys.h ============================================================================== --- head/sys/vm/vm_phys.h Wed Jul 21 09:20:40 2010 (r210326) +++ head/sys/vm/vm_phys.h Wed Jul 21 09:27:00 2010 (r210327) @@ -44,6 +44,7 @@ void vm_phys_add_page(vm_paddr_t pa); vm_page_t vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, unsigned long boundary); +vm_page_t vm_phys_alloc_freelist_pages(int flind, int pool, int order); vm_page_t vm_phys_alloc_pages(int pool, int order); vm_paddr_t vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment); void vm_phys_free_pages(vm_page_t m, int order);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201007210927.o6L9R07L074578>