Date: Tue, 14 Mar 2006 21:27:47 GMT From: Peter Wemm <peter@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 93306 for review Message-ID: <200603142127.k2ELRlP6089714@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=93306 Change 93306 by peter@peter_daintree on 2006/03/14 21:27:31 Check in my WIP for half sized pv entries, very roughly ported forward 6.x at yahoo. Much work is still to be done, especially the get_pv_entry() reclaim process which needs to be synced with alc's changes. Affected files ... .. //depot/projects/hammer/sys/amd64/amd64/pmap.c#134 edit .. //depot/projects/hammer/sys/amd64/include/pmap.h#60 edit Differences ... ==== //depot/projects/hammer/sys/amd64/amd64/pmap.c#134 (text+ko) ==== @@ -182,7 +182,6 @@ /* * Data for the pv entry allocation mechanism */ -static uma_zone_t pvzone; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static int shpgperproc = PMAP_SHPGPERPROC; @@ -198,7 +197,7 @@ */ static caddr_t crashdumpmap; -static PMAP_INLINE void free_pv_entry(pv_entry_t pv); +static PMAP_INLINE void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap); static void pmap_clear_ptes(vm_page_t m, long bit); @@ -509,7 +508,7 @@ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys); kernel_pmap->pm_active = -1; /* don't allow deactivation */ - TAILQ_INIT(&kernel_pmap->pm_pvlist); + TAILQ_INIT(&kernel_pmap->pm_pvchunk); nkpt = NKPT; /* @@ -569,8 +568,6 @@ * high water mark so that the system can recover from excessive * numbers of pv entries. */ - pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, - NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); @@ -1063,7 +1060,7 @@ PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys); pmap->pm_active = 0; - TAILQ_INIT(&pmap->pm_pvlist); + TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } @@ -1100,7 +1097,7 @@ pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; pmap->pm_active = 0; - TAILQ_INIT(&pmap->pm_pvlist); + TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } @@ -1437,14 +1434,51 @@ * page management routines. ***************************************************/ +CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); +CTASSERT(_NPCM == 3); +CTASSERT(_NPCPV == 168); + +static __inline struct pv_chunk * +pv_to_chunk(pv_entry_t pv) +{ + + return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); +} + +#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) + +#define PC_FREE0 0xfffffffffffffffful +#define PC_FREE1 0xfffffffffffffffful +#define PC_FREE2 0x000000fffffffffful + /* * free the pv_entry back to the free list */ static PMAP_INLINE void -free_pv_entry(pv_entry_t pv) +free_pv_entry(pmap_t pmap, pv_entry_t pv) { + vm_page_t m; + struct pv_chunk *pc; + int idx, field, bit; + pv_entry_count--; - uma_zfree(pvzone, pv); + pc = pv_to_chunk(pv); + idx = pv - &pc->pc_pventry[0]; + field = idx / 64; + bit = idx % 64; + pc->pc_map[field] |= 1ul << bit; + /* move to head of list */ + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || + pc->pc_map[2] != PC_FREE2) + return; + /* entire chunk is free, return it */ + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); + vm_page_lock_queues(); + vm_page_free(m); + vm_page_unlock_queues(); } /* @@ -1452,6 +1486,53 @@ * when needed. */ static pv_entry_t +get_pv_entry(pmap_t pmap) +{ + static vm_pindex_t colour; + int bit, field; + pv_entry_t pv; + struct pv_chunk *pc; + vm_page_t m; + + pv_entry_count++; + if ((pv_entry_count > pv_entry_high_water) && + (pmap_pagedaemon_waken == 0)) { + pmap_pagedaemon_waken = 1; + wakeup (&vm_pages_needed); + } + pc = TAILQ_FIRST(&pmap->pm_pvchunk); + for (field = 0; field < _NPCM; field++) { + bit = bsrq(pc->pc_map[field]); + if (bit >= 0) + break; + } + if (bit >= 0) { + pv = &pc->pc_pventry[field * 64 + bit]; + pc->pc_map[field] &= (1ul << bit); + /* If this was the last item, move it to tail */ + if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && + pc->pc_map[2] == 0) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); + } + return (pv); + } + /* No free items, allocate another chunk */ + m = vm_page_alloc(NULL, colour, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ); + if (m == NULL) + return (NULL); + colour++; + pc = (void *)PHYS_TO_DMAP(m->phys_addr); + pc->pc_pmap = pmap; + pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ + pc->pc_map[1] = PC_FREE1; + pc->pc_map[2] = PC_FREE2; + pv = &pc->pc_pventry[0]; + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + return (pv); +} +#if 0 +static pv_entry_t get_pv_entry(pmap_t locked_pmap) { static const struct timeval printinterval = { 60, 0 }; @@ -1535,6 +1616,7 @@ } return (allocated_pv); } +#endif static void pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) @@ -1543,24 +1625,16 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); - if (m->md.pv_list_count < pmap->pm_stats.resident_count) { - TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pmap == pv->pv_pmap && va == pv->pv_va) - break; - } - } else { - TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { - if (va == pv->pv_va) - break; - } + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + if (pmap == PV_PMAP(pv) && va == pv->pv_va) + break; } KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); - free_pv_entry(pv); + free_pv_entry(pmap, pv); } /* @@ -1574,11 +1648,9 @@ pv = get_pv_entry(pmap); pv->pv_va = va; - pv->pv_pmap = pmap; PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); - TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); m->md.pv_list_count++; } @@ -1760,6 +1832,7 @@ pmap_remove_all(vm_page_t m) { register pv_entry_t pv; + pmap_t pmap; pt_entry_t *pte, tpte; pd_entry_t ptepde; @@ -1774,12 +1847,13 @@ #endif mtx_assert(&vm_page_queue_mtx, MA_OWNED); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { - PMAP_LOCK(pv->pv_pmap); - pv->pv_pmap->pm_stats.resident_count--; - pte = pmap_pte_pde(pv->pv_pmap, pv->pv_va, &ptepde); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pmap->pm_stats.resident_count--; + pte = pmap_pte_pde(pmap, pv->pv_va, &ptepde); tpte = pte_load_clear(pte); if (tpte & PG_W) - pv->pv_pmap->pm_stats.wired_count--; + pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_flag_set(m, PG_REFERENCED); @@ -1793,13 +1867,12 @@ if (pmap_track_modified(pv->pv_va)) vm_page_dirty(m); } - pmap_invalidate_page(pv->pv_pmap, pv->pv_va); - TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); + pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); m->md.pv_list_count--; - pmap_unuse_pt(pv->pv_pmap, pv->pv_va, ptepde); - PMAP_UNLOCK(pv->pv_pmap); - free_pv_entry(pv); + pmap_unuse_pt(pmap, pv->pv_va, ptepde); + PMAP_UNLOCK(pmap); + free_pv_entry(pmap, pv); } vm_page_flag_clear(m, PG_WRITEABLE); } @@ -2563,7 +2636,7 @@ mtx_assert(&vm_page_queue_mtx, MA_OWNED); TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { - if (pv->pv_pmap == pmap) { + if (PV_PMAP(pv) == pmap) { return TRUE; } loops++; @@ -2573,7 +2646,6 @@ return (FALSE); } -#define PMAP_REMOVE_PAGES_CURPROC_ONLY /* * Remove all pages from specified address space * this aids process exit speeds. Also, this code @@ -2589,73 +2661,97 @@ { pt_entry_t *pte, tpte; vm_page_t m; - pv_entry_t pv, npv; + pv_entry_t pv; + struct pv_chunk *pc, *npc; + int field, idx; + int64_t bit; + int allfree, didfree;; -#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { printf("warning: pmap_remove_pages called with non-current pmap\n"); return; } -#endif vm_page_lock_queues(); PMAP_LOCK(pmap); - for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { + TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { + allfree = 1; + didfree = 0; + /* + * If only we could eliminate the sva/eva tests, and define + * pmap_remove_pages() to simply remove *ALL* user pages, we + * could make it faster here. eg: replace for() loop with + * bsrq() and some other some algorithm changes. + */ + for (idx = 0; idx < _NPCPV; idx++) { + field = idx / 64; + bit = idx % 64; + if ((pc->pc_map[field] & 1ul << bit) == 0) { /* inuse */ + pv = &pc->pc_pventry[idx]; - if (pv->pv_va >= eva || pv->pv_va < sva) { - npv = TAILQ_NEXT(pv, pv_plist); - continue; - } + if (pv->pv_va >= eva || pv->pv_va < sva) { + allfree = 0; + continue; + } -#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY - pte = vtopte(pv->pv_va); -#else - pte = pmap_pte(pmap, pv->pv_va); -#endif - tpte = *pte; + pte = vtopte(pv->pv_va); + tpte = *pte; - if (tpte == 0) { - printf("TPTE at %p IS ZERO @ VA %08lx\n", - pte, pv->pv_va); - panic("bad pte"); - } + if (tpte == 0) { + printf( + "TPTE at %p IS ZERO @ VA %08lx\n", + pte, pv->pv_va); + panic("bad pte"); + } /* * We cannot remove wired pages from a process' mapping at this time */ - if (tpte & PG_W) { - npv = TAILQ_NEXT(pv, pv_plist); - continue; - } + if (tpte & PG_W) + continue; + + m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); + KASSERT(m->phys_addr == (tpte & PG_FRAME), + ("vm_page_t %p phys_addr mismatch %016jx %016jx", + m, (uintmax_t)m->phys_addr, + (uintmax_t)tpte)); - m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); - KASSERT(m->phys_addr == (tpte & PG_FRAME), - ("vm_page_t %p phys_addr mismatch %016jx %016jx", - m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); + KASSERT(m < &vm_page_array[vm_page_array_size], + ("pmap_remove_pages: bad tpte %#jx", + (uintmax_t)tpte)); - KASSERT(m < &vm_page_array[vm_page_array_size], - ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); + pmap->pm_stats.resident_count--; - pmap->pm_stats.resident_count--; + pte_clear(pte); - pte_clear(pte); + /* + * Update the vm_page_t clean/reference bits. + */ + if (tpte & PG_M) + vm_page_dirty(m); - /* - * Update the vm_page_t clean and reference bits. - */ - if (tpte & PG_M) { - vm_page_dirty(m); + /* Mark free */ + didfree = 1; + pc->pc_map[field] |= 1ul << bit; + m->md.pv_list_count--; + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); + pmap_unuse_pt(pmap, pv->pv_va, + *vtopde(pv->pv_va)); + } + } + if (allfree) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); + vm_page_lock_queues(); + vm_page_free(m); + vm_page_unlock_queues(); + } else { + if (didfree) { + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + } } - - npv = TAILQ_NEXT(pv, pv_plist); - TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); - - m->md.pv_list_count--; - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - if (TAILQ_EMPTY(&m->md.pv_list)) - vm_page_flag_clear(m, PG_WRITEABLE); - - pmap_unuse_pt(pmap, pv->pv_va, *vtopde(pv->pv_va)); - free_pv_entry(pv); } pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); @@ -2673,6 +2769,7 @@ { pv_entry_t pv; pt_entry_t *pte; + pmap_t pmap; boolean_t rv; rv = FALSE; @@ -2688,10 +2785,11 @@ */ if (!pmap_track_modified(pv->pv_va)) continue; - PMAP_LOCK(pv->pv_pmap); - pte = pmap_pte(pv->pv_pmap, pv->pv_va); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, pv->pv_va); rv = (*pte & PG_M) != 0; - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); if (rv) break; } @@ -2729,6 +2827,7 @@ pmap_clear_ptes(vm_page_t m, long bit) { register pv_entry_t pv; + pmap_t pmap; pt_entry_t pbits, *pte; if ((m->flags & PG_FICTITIOUS) || @@ -2749,8 +2848,9 @@ continue; } - PMAP_LOCK(pv->pv_pmap); - pte = pmap_pte(pv->pv_pmap, pv->pv_va); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, pv->pv_va); retry: pbits = *pte; if (pbits & bit) { @@ -2764,9 +2864,9 @@ } else { atomic_clear_long(pte, bit); } - pmap_invalidate_page(pv->pv_pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va); } - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); } if (bit == PG_RW) vm_page_flag_clear(m, PG_WRITEABLE); @@ -2805,6 +2905,7 @@ pmap_ts_referenced(vm_page_t m) { register pv_entry_t pv, pvf, pvn; + pmap_t pmap; pt_entry_t *pte; pt_entry_t v; int rtval = 0; @@ -2827,20 +2928,21 @@ if (!pmap_track_modified(pv->pv_va)) continue; - PMAP_LOCK(pv->pv_pmap); - pte = pmap_pte(pv->pv_pmap, pv->pv_va); + pmap = PV_PMAP(pv); + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, pv->pv_va); if (pte && ((v = pte_load(pte)) & PG_A) != 0) { atomic_clear_long(pte, PG_A); - pmap_invalidate_page(pv->pv_pmap, pv->pv_va); + pmap_invalidate_page(pmap, pv->pv_va); rtval++; if (rtval > 4) { - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); break; } } - PMAP_UNLOCK(pv->pv_pmap); + PMAP_UNLOCK(pmap); } while ((pv = pvn) != NULL && pv != pvf); } ==== //depot/projects/hammer/sys/amd64/include/pmap.h#60 (text+ko) ==== @@ -222,6 +222,7 @@ * Pmap stuff */ struct pv_entry; +struct pv_chunk; struct md_page { int pv_list_count; @@ -231,7 +232,7 @@ struct pmap { struct mtx pm_mtx; pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ - TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ + TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ u_int pm_active; /* active on cpus */ /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ @@ -260,12 +261,24 @@ * mappings of that page. An entry is a pv_entry_t, the list is pv_table. */ typedef struct pv_entry { - pmap_t pv_pmap; /* pmap where mapping lies */ vm_offset_t pv_va; /* virtual address for mapping */ TAILQ_ENTRY(pv_entry) pv_list; - TAILQ_ENTRY(pv_entry) pv_plist; } *pv_entry_t; +/* + * pv_entries are allocated in chunks per-process. This avoids the + * need to track per-pmap assignments. + */ +#define _NPCM 3 +#define _NPCPV 168 +struct pv_chunk { + pmap_t pc_pmap; + TAILQ_ENTRY(pv_chunk) pc_list; + uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */ + uint64_t pc_spare[2]; + struct pv_entry pc_pventry[_NPCPV]; +}; + #ifdef _KERNEL #define NPPROVMTRR 8
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200603142127.k2ELRlP6089714>