Date: Wed, 1 Mar 2006 08:07:03 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 92587 for review Message-ID: <200603010807.k21873hq008560@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=92587 Change 92587 by kmacy@kmacy_storage:sun4v_work on 2006/03/01 08:06:57 implement pmap_enter as well as all of the functions that it depends on simplify pmap_kextract with help of the new tsb_lookup_tte Affected files ... .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#12 edit Differences ... ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#12 (text+ko) ==== @@ -34,6 +34,7 @@ #include <vm/vm_extern.h> #include <vm/vm_pageout.h> #include <vm/vm_pager.h> +#include <vm/uma.h> #include <machine/cpu.h> #include <machine/cache.h> @@ -68,7 +69,6 @@ struct msgbuf *msgbufp; vm_paddr_t msgbuf_phys; - /* * Map of physical memory reagions. */ @@ -95,18 +95,22 @@ +#ifndef PMAP_SHPGPERPROC +#define PMAP_SHPGPERPROC 200 +#endif /* - * Kernel pmap. + * Data for the pv entry allocation mechanism */ -struct pmap kernel_pmap_store; +static uma_zone_t pvzone; +static struct vm_object pvzone_obj; +static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; /* - * Kernel TSBs + * Kernel pmap. */ -#define TSB8K_INDEX 0 -#define TSB4M_INDEX 1 +struct pmap kernel_pmap_store; -static hv_tsb_info_t kernel_td[MAX_TSB_INFO]; +hv_tsb_info_t kernel_td[MAX_TSB_INFO]; /* @@ -136,7 +140,14 @@ #define UNIMPLEMENTED panic("%s not implemented", __FUNCTION__) +static void free_pv_entry(pv_entry_t pv); +static pv_entry_t get_pv_entry(pmap_t locked_pmap); + static void pmap_scrub_pages(vm_paddr_t pa, int64_t size); +static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); +static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); + + /* * Quick sort callout for comparing memory regions. */ @@ -173,6 +184,109 @@ return (0); } + +static __inline void +free_pv_entry(pv_entry_t pv) +{ + pv_entry_count--; + uma_zfree(pvzone, pv); +} + +/* + * get a new pv_entry, allocating a block from the system + * when needed. + */ +static pv_entry_t +get_pv_entry(pmap_t locked_pmap) +{ + static const struct timeval printinterval = { 60, 0 }; + static struct timeval lastprint; + struct vpgqueues *vpq; + pmap_t pmap; + pv_entry_t allocated_pv, next_pv, pv; + vm_offset_t va; + vm_page_t m; + + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + allocated_pv = uma_zalloc(pvzone, M_NOWAIT); + if (allocated_pv != NULL) { + pv_entry_count++; + if (pv_entry_count > pv_entry_high_water) + pagedaemon_wakeup(); + else + return (allocated_pv); + } + + /* + * Reclaim pv entries: At first, destroy mappings to inactive + * pages. After that, if a pv entry is still needed, destroy + * mappings to active pages. + */ + if (ratecheck(&lastprint, &printinterval)) + printf("Approaching the limit on PV entries, " + "increase the vm.pmap.shpgperproc tunable.\n"); + + vpq = &vm_page_queues[PQ_INACTIVE]; +retry: + sched_pin(); + TAILQ_FOREACH(m, &vpq->pl, pageq) { + if (m->hold_count || m->busy || (m->flags & PG_BUSY)) + continue; + TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { + UNIMPLEMENTED; + va = pv->pv_va; + pmap = pv->pv_pmap; + /* Avoid deadlock and lock recursion. */ + if (pmap > locked_pmap) + PMAP_LOCK(pmap); + else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) + continue; + pmap->pm_stats.resident_count--; +#ifdef notyet + pte = pmap_pte_quick(pmap, va); + tpte = pte_load_clear(pte); + KASSERT((tpte & PG_W) == 0, + ("get_pv_entry: wired pte %#jx", (uintmax_t)tpte)); + if (tpte & PG_A) + vm_page_flag_set(m, PG_REFERENCED); + if (tpte & PG_M) { + KASSERT((tpte & PG_RW), + ("get_pv_entry: modified page not writable: va: %#x, pte: %#jx", + va, (uintmax_t)tpte)); + if (pmap_track_modified(va)) + vm_page_dirty(m); + } +#endif + pmap_invalidate_page(pmap, va); + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); + m->md.pv_list_count--; +#ifdef notyet + pmap_unuse_pt(pmap, va); +#endif + + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + if (allocated_pv == NULL) + allocated_pv = pv; + else + free_pv_entry(pv); + } + } + sched_unpin(); + if (allocated_pv == NULL) { + if (vpq == &vm_page_queues[PQ_INACTIVE]) { + vpq = &vm_page_queues[PQ_ACTIVE]; + goto retry; + } + panic("get_pv_entry: increase the vm.pmap.shpgperproc tunable"); + } + return (allocated_pv); +} + /* * Allocate a physical page of memory directly from the phys_avail map. * Can only be called from pmap_bootstrap before avail start and end are @@ -184,7 +298,6 @@ vm_paddr_t pa; int i; - printf("looking for size %lx\n", size); size = round_page(size); for (i = 0; phys_avail[i + 1] != 0; i += 2) { @@ -306,6 +419,7 @@ kernel_td[TSB8K_INDEX].hvtsb_pa = pa; tsb_4m_size = virtsz >> (PAGE_SHIFT_4M - TTE_SHIFT); + pa = pmap_bootstrap_alloc(tsb_4m_size); kernel_td[TSB4M_INDEX].hvtsb_idxpgsz = TTE4M; @@ -320,7 +434,6 @@ pmap_scrub_pages(kernel_td[TSB4M_INDEX].hvtsb_pa, tsb_4m_size); - /* * Set up TSB descriptors for the hypervisor * @@ -332,7 +445,6 @@ /* * allocate MMU fault status areas for all CPUS */ - printf("allocate fault status area\n"); mmu_fault_status_area = pmap_bootstrap_alloc(MMFSA_SIZE*MAXCPU); /* @@ -365,7 +477,6 @@ virtual_avail += KSTACK_GUARD_PAGES * PAGE_SIZE; kstack0 = virtual_avail; virtual_avail += KSTACK_PAGES * PAGE_SIZE; - printf("setting ttes\n"); for (i = 0; i < KSTACK_PAGES; i++) { pa = kstack0_phys + i * PAGE_SIZE; va = kstack0 + i * PAGE_SIZE; @@ -405,18 +516,21 @@ translations[i].om_start > VM_MAX_PROM_ADDRESS) continue; #endif - printf("om_size: %ld om_start: %lx om_tte: %lx\n", translations[i].om_size, + printf("om_size=%ld om_start=%lx om_tte=%lx\n", translations[i].om_size, translations[i].om_start, translations[i].om_tte); - if (translations[i].om_size == PAGE_SIZE_4M) + if (translations[i].om_size == PAGE_SIZE_4M) { + tsb_assert_invalid(&kernel_td[TSB4M_INDEX], translations[i].om_start); tsb_set_tte(&kernel_td[TSB4M_INDEX], translations[i].om_start, TTE_GET_PA(translations[i].om_tte), TTE_KERNEL | VTD_4M, 0); - else + } else { for (off = 0; off < translations[i].om_size; off += PAGE_SIZE) { va = translations[i].om_start + off; pa = TTE_GET_PA(translations[i].om_tte) + off; + tsb_assert_invalid(&kernel_td[TSB8K_INDEX], va); tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, TTE_KERNEL | VTD_8K, 0); } + } } /* @@ -437,11 +551,9 @@ PMAP_LOCK_INIT(kernel_pmap); TAILQ_INIT(&kernel_pmap->pm_pvlist); - - printf("physical address of kernel_td: 0x%lx\n", vtophys((vm_offset_t)&kernel_td)); - printf("set ctx0\n"); - error = hv_set_ctx0(2, vtophys((vm_offset_t)&kernel_td)); - printf("ctx0 set\n"); + printf("physical address of kernel_td: 0x%lx\n", vtophys((vm_offset_t)&kernel_td)); + + error = hv_set_ctx0(MAX_TSB_INFO, vtophys((vm_offset_t)&kernel_td)); if (error != H_EOK) panic("failed to set ctx0 TSBs error: %ld", error); @@ -477,12 +589,14 @@ pmap_clear_modify(vm_page_t m) { /* XXX Need to also clear this in the TSB if possible :-( */ + UNIMPLEMENTED; tte_clear_phys_bit(m, VTD_W); } void pmap_clear_reference(vm_page_t m) { + UNIMPLEMENTED; tte_clear_phys_bit(m, VTD_REF); } @@ -508,7 +622,119 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, boolean_t wired) { - UNIMPLEMENTED; + vm_paddr_t pa, opa; + uint64_t tte_data, otte_data; + vm_page_t om; + int invlva; +#if 0 + printf("ctx=%d va=%lx prot=%x wired=%x\n", pmap->pm_context, + va, prot, wired); +#endif + + vm_page_lock_queues(); + om = NULL; + PMAP_LOCK(pmap); + sched_pin(); + + tte_data = pa = VM_PAGE_TO_PHYS(m); + otte_data = tsb_lookup_tte(va, pmap->pm_context); + opa = TTE_GET_PA(otte_data); + /* + * Mapping has not changed, must be protection or wiring change. + */ + if (pa == opa) { + /* + * Wiring change, just update stats. We don't worry about + * wiring PT pages as they remain resident as long as there + * are valid mappings in them. Hence, if a user page is wired, + * the PT page will be also. + */ + if (wired && ((otte_data & VTD_WIRED) == 0)) + pmap->pm_stats.wired_count++; + else if (!wired && (otte_data & VTD_WIRED)) + pmap->pm_stats.wired_count--; + + /* + * We might be turning off write access to the page, + * so we go ahead and sense modify status. + */ + if (otte_data & VTD_MANAGED) { + om = m; + pa |= VTD_MANAGED; + } + goto validate; + + } + /* + * Mapping has changed, invalidate old range and fall through to + * handle validating new mapping. + */ + if (opa) { + if (otte_data & VTD_W) + pmap->pm_stats.wired_count--; + if (otte_data & VTD_MANAGED) { + om = PHYS_TO_VM_PAGE(opa); + pmap_remove_entry(pmap, om, va); + } + } else + pmap->pm_stats.resident_count++; + + /* + * Enter on the PV list if part of our managed memory. + */ + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { + pmap_insert_entry(pmap, va, m); + tte_data |= VTD_MANAGED; + } + /* + * Increment counters + */ + if (wired) + pmap->pm_stats.wired_count++; + +validate: + /* + * Now validate mapping with desired protection/wiring. + */ + if ((prot & VM_PROT_WRITE) != 0) + tte_data |= (VTD_W|VTD_WR_PERM); /* XXX need to handle modify */ + if ((prot & VM_PROT_EXECUTE) != 0) + tte_data |= VTD_X; + if (wired) + tte_data |= VTD_WIRED; + if (pmap == kernel_pmap) + tte_data |= TTE_KERNEL_MINFLAGS; + + if ((otte_data & ~(VTD_W|VTD_REF)) != tte_data) { + if (otte_data & VTD_V) { + invlva = FALSE; + tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, tte_data, + pmap->pm_context); + if (otte_data & VTD_REF) { + if (otte_data & VTD_MANAGED) + vm_page_flag_set(om, PG_REFERENCED); + if (opa != pa) + invlva = TRUE; + } + if (otte_data & VTD_W) { + if ((otte_data & VTD_MANAGED) && + pmap_track_modified(pmap, va)) + vm_page_dirty(om); + if ((prot & VM_PROT_WRITE) == 0) + invlva = TRUE; + } + if (invlva) + pmap_invalidate_page(pmap, va); + } else + tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, tte_data, + pmap->pm_context); + } + + + sched_unpin(); + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); + } vm_page_t @@ -556,9 +782,42 @@ pmap_init(void) { /* allocate pv_entry zones */ - return; + int shpgperproc = PMAP_SHPGPERPROC; + + /* + * Initialize the address space (zone) for the pv entries. Set a + * high water mark so that the system can recover from excessive + * numbers of pv entries. + */ + pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); + TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); + pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; + TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); + pv_entry_high_water = 9 * (pv_entry_max / 10); + uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); + } +/* + * Create a pv entry for page at pa for + * (pmap, va). + */ +static void +pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pv_entry_t pv; + + pv = get_pv_entry(pmap); + pv->pv_va = va; + pv->pv_pmap = pmap; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count++; +} void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) @@ -578,8 +837,14 @@ void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - - printf("%s unimplemented\n", __FUNCTION__); + vm_offset_t tva; + printf("pmap_invalidate_range(sva=%lx, eva=%lx)\n", sva, eva); + /* XXX SUN4V_FIXME - oversimplified logic */ + if (((sva & PAGE_MASK_4M) != 0) || ((eva & PAGE_MASK_4M) != 0)) { + for (tva = sva; tva < eva; tva += PAGE_SIZE_8K) + invlpg(tva, pmap->pm_context); + } else + UNIMPLEMENTED; } @@ -596,6 +861,8 @@ boolean_t pmap_is_modified(vm_page_t m) { + UNIMPLEMENTED; + /* Not properly handled yet */ return tte_get_phys_bit(m, VTD_W); } @@ -612,9 +879,7 @@ void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { - printf("pmap_kentering\n"); tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, TTE_KERNEL | VTD_8K, 0); - printf("pmap_kentered\n"); } /* @@ -627,20 +892,15 @@ { uint64_t tte_data; vm_paddr_t pa; - /* - * check 4M TSB - */ - tte_data = tsb_get_tte(&kernel_td[TSB4M_INDEX], va, 0); - pa = TTE_GET_PA(tte_data) | (va & PAGE_MASK_4M); - if (TTE_GET_PA(tte_data) != 0) - goto done; - /* - * check 8k TSB - */ - tte_data = tsb_get_tte(&kernel_td[TSB8K_INDEX], va, 0); - pa = TTE_GET_PA(tte_data)| (va & PAGE_MASK); - -done: + + pa = 0; +#if 0 + printf("tte_data=%lx TTE_GET_PA(tte_data)=%lx (va & TTE_GET_PAGE_MASK(tte_data))=%lx\n", + tsb_lookup_tte(va, 0), TTE_GET_PA(tte_data), (va & TTE_GET_PAGE_MASK(tte_data))); +#endif + if ((tte_data = tsb_lookup_tte(va, 0)) != 0) + pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data)); + return pa; } @@ -748,7 +1008,7 @@ { if ((prot & VM_PROT_WRITE) == 0) { if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { - tte_clear_phys_bit(m, VTD_SW|VTD_W); + tte_clear_phys_bit(m, VTD_WR_PERM|VTD_W); } else { pmap_remove_all(m); } @@ -821,6 +1081,7 @@ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { + UNIMPLEMENTED; #ifdef notyet vm_offset_t pdnxt; pd_entry_t ptpaddr; @@ -1048,6 +1309,33 @@ UNIMPLEMENTED; } +static void +pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) +{ + pv_entry_t pv; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if (m->md.pv_list_count < pmap->pm_stats.resident_count) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + if (pmap == pv->pv_pmap && va == pv->pv_va) + break; + } + } else { + TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { + if (va == pv->pv_va) + break; + } + } + KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count--; + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); + free_pv_entry(pv); +} + void pmap_remove_pages(pmap_t pmap, vm_offset_t start, vm_offset_t end)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200603010807.k21873hq008560>