Date: Sat, 4 Mar 2006 08:44:25 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 92714 for review Message-ID: <200603040844.k248iPiv019247@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=92714 Change 92714 by kmacy@kmacy_storage:sun4v_work on 2006/03/04 08:43:25 re-define pmap to be hashtable backed for user-processes and TSBs to be per process. Affected files ... .. //depot/projects/kmacy_sun4v/src/sys/conf/files.sun4v#7 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/asi.h#9 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/asmacros.h#4 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/pcb.h#5 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/pmap.h#6 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tsb.h#7 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte.h#6 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/locore.S#6 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#13 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/support.S#7 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tsb.c#6 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#1 add Differences ... ==== //depot/projects/kmacy_sun4v/src/sys/conf/files.sun4v#7 (text+ko) ==== @@ -74,6 +74,7 @@ sun4v/sun4v/swtch.S standard sun4v/sun4v/tsb.c standard sun4v/sun4v/tte.c standard +sun4v/sun4v/tte_hash.c standard sun4v/sun4v/tick.c standard sun4v/sun4v/trap.c standard sun4v/sun4v/uio_machdep.c standard ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/asi.h#9 (text+ko) ==== @@ -124,10 +124,10 @@ #define ASI_SCRATCHPAD_6_REG 0x30 #define ASI_SCRATCHPAD_7_REG 0x38 -/* MMFSA == 0 */ + +#define SCRATCH_REG_MMFSA ASI_SCRATCHPAD_0_REG #define SCRATCH_REG_PCPU ASI_SCRATCHPAD_1_REG -#define SCRATCH_REG_PCB ASI_SCRATCHPAD_2_REG -#define SCRATCH_REG_PTD ASI_SCRATCHPAD_3_REG +#define SCRATCH_REG_HASH ASI_SCRATCHPAD_2_REG #define MMU_CID_P 0x08 #define MMU_CID_S 0x10 ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/asmacros.h#4 (text+ko) ==== @@ -135,13 +135,11 @@ mov SCRATCH_REG_PCPU, PCPU_REG; \ ldxa [%g0 + PCPU_REG]ASI_SCRATCHPAD, PCPU_REG; -#define GET_PTD_SCRATCH(reg) \ - mov SCRATCH_REG_PTD, reg; \ +#define GET_HASH_SCRATCH(reg) \ + mov SCRATCH_REG_HASH, reg; \ ldxa [%g0 + reg]ASI_SCRATCHPAD, reg; -#define SET_MMFSA_SCRATCH(reg) stxa reg, [%g0]ASI_SCRATCHPAD -#define SET_PCPU_SCRATCH stxa PCPU_REG, [%g0 + SCRATCH_REG_PCPU]ASI_SCRATCHPAD -#define SET_PTD_SCRATCH(reg) stxa reg, [%g0 + SCRATCH_REG_PTD]ASI_SCRATCHPAD +#define SET_HASH_SCRATCH(reg) stxa reg, [%g0 + SCRATCH_REG_HASH]ASI_SCRATCHPAD #define GET_PCB(reg) \ GET_PCPU_SCRATCH; \ ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/pcb.h#5 (text+ko) ==== ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/pmap.h#6 (text+ko) ==== @@ -50,6 +50,7 @@ typedef struct pmap *pmap_t; struct pv_entry; +struct tte_hash; struct md_page { int pv_list_count; @@ -58,12 +59,12 @@ struct pmap { - struct mtx pm_mtx; - vm_paddr_t *pm_pdir; + struct mtx pm_mtx; + struct tte_hash *pm_hash; TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ - vm_object_t pm_tsb_obj; - cpumask_t pm_active; - uint16_t pm_context; + struct hv_tsb_info pm_tsb; + cpumask_t pm_active; + uint16_t pm_context; struct pmap_statistics pm_stats; }; @@ -106,6 +107,10 @@ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va); void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); void pmap_invalidate_all(pmap_t pmap); +void pmap_scrub_pages(vm_paddr_t pa, int64_t size); + + + #define vtophys(va) pmap_kextract((vm_offset_t)(va)) extern struct pmap kernel_pmap_store; ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tsb.h#7 (text+ko) ==== @@ -1,59 +1,8 @@ -/*- - * Copyright (c) 1997 Berkeley Software Design, Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Berkeley Software Design Inc's name may not be used to endorse or - * promote products derived from this software without specific prior - * written permission. - * - * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: BSDI: pmap.v9.h,v 1.10.2.6 1999/08/23 22:18:44 cp Exp - * $FreeBSD: src/sys/sparc64/include/tsb.h,v 1.18 2003/04/08 06:35:08 jake Exp $ - */ - #ifndef _MACHINE_TSB_H_ #define _MACHINE_TSB_H_ #define MAX_TSB_INFO 2 -#define TSB_PAGES_SHIFT (4) -#define TSB_PAGES (1 << TSB_PAGES_SHIFT) -#define TSB_BSHIFT (TSB_PAGES_SHIFT + PAGE_SHIFT) -#define TSB_BSIZE (1UL << TSB_BSHIFT) -#define TSB_SIZE (TSB_BSIZE / sizeof(struct tte)) -#define TSB_BUCKET_SHIFT (2) -#define TSB_BUCKET_SIZE (1 << TSB_BUCKET_SHIFT) -#define TSB_BUCKET_ADDRESS_BITS \ - (TSB_BSHIFT - TSB_BUCKET_SHIFT - TTE_SHIFT) -#define TSB_BUCKET_MASK ((1 << TSB_BUCKET_ADDRESS_BITS) - 1) - - -#define TSB_ENTRY_SHIFT 4 /* each entry = 128 bits = 16 bytes */ -#define TSB_ENTRY_SIZE (1 << 4) -#define TSB_START_SIZE 9 -#define TSB_ENTRIES(tsbsz) (1 << (TSB_START_SIZE + tsbsz)) -#define TSB_BYTES(tsbsz) (TSB_ENTRIES(tsbsz) << TSB_ENTRY_SHIFT) -#define TSB_OFFSET_MASK(tsbsz) (TSB_ENTRIES(tsbsz) - 1) - - /* * Values for "tsb_ttesz_mask" bitmask. */ @@ -74,17 +23,19 @@ struct hv_tsb_info; -typedef uint64_t tte_t; + +void tsb_assert_invalid(struct hv_tsb_info *tsb, vm_offset_t va); -void tsb_assert_invalid(hv_tsb_info_t *tsb, vm_offset_t va); +void tsb_set_tte(struct hv_tsb_info *tsb, vm_offset_t va, tte_t tte_data, uint64_t ctx); -void tsb_set_tte(struct hv_tsb_info *tsb, vm_offset_t va, vm_paddr_t pa, uint64_t flags, uint64_t ctx); +tte_t tsb_get_tte(struct hv_tsb_info *tsb, vm_offset_t va); -tte_t tsb_get_tte(struct hv_tsb_info *tsb, vm_offset_t va, uint64_t ctx); +tte_t tsb_lookup_tte(vm_offset_t va, uint64_t context); -tte_t tsb_lookup_tte(vm_offset_t va, uint64_t ctx); +void tsb_clear(struct hv_tsb_info *tsb); -void tsb_clear_tte(struct hv_tsb_info *tsb, vm_offset_t, uint64_t ctx); +void tsb_clear_tte(struct hv_tsb_info *tsb, vm_offset_t va); +void tsb_clear_range(struct hv_tsb_info *tsb, vm_offset_t sva, vm_offset_t eva); #endif /* !_MACHINE_TSB_H_ */ ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte.h#6 (text+ko) ==== @@ -111,7 +111,7 @@ #define VTD_W (1UL << 6) #define VTD_REF (1UL << 5) -#define VTD_WR_PERM (1UL << 4) +#define VTD_SW_W (1UL << 4) #define VTD_MANAGED (1UL << 58) #define VTD_WIRED (1UL << 57) #define VTD_LOCK (1UL << 56) @@ -128,8 +128,9 @@ /* * default flags for kernel pages */ -#define TTE_KERNEL VTD_V | VTD_CP | VTD_CV | VTD_P | VTD_X | VTD_W | VTD_WR_PERM | VTD_REF | VTD_WIRED +#define TTE_KERNEL VTD_V | VTD_CP | VTD_CV | VTD_P | VTD_X | VTD_W | VTD_SW_W | VTD_REF | VTD_WIRED #define TTE_KERNEL_MINFLAGS VTD_V | VTD_CP | VTD_CV | VTD_P +#define TTE_MINFLAGS VTD_V | VTD_CP | VTD_CV #define VTD_SIZE_BITS (4) #define VTD_SIZE_MASK ((1 << VTD_SIZE_BITS) - 1) @@ -227,12 +228,18 @@ struct pmap; +typedef uint64_t tte_t; + void tte_clear_phys_bit(vm_page_t m, uint64_t flags); + void tte_set_phys_bit(vm_page_t m, uint64_t flags); + boolean_t tte_get_phys_bit(vm_page_t m, uint64_t flags); void tte_clear_virt_bit(struct pmap *pmap, vm_offset_t va, uint64_t flags); + void tte_set_virt_bit(struct pmap *pmap, vm_offset_t va, uint64_t flags); + boolean_t tte_get_virt_bit(struct pmap *pmap, vm_offset_t va, uint64_t flags); #endif /* !_MACHINE_TTE_H_ */ ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/locore.S#6 (text+ko) ==== @@ -115,8 +115,6 @@ */ mov SCRATCH_REG_PCPU, %g1 stxa PCPU_REG, [%g1]ASI_SCRATCHPAD - mov SCRATCH_REG_PCB, %g1 - stxa %g6, [%g1]ASI_SCRATCHPAD retl nop ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#13 (text+ko) ==== @@ -46,6 +46,7 @@ #include <machine/smp.h> #include <machine/tlb.h> #include <machine/tte.h> +#include <machine/tte_hash.h> #include <machine/pcb.h> #include <machine/tsb.h> @@ -143,7 +144,6 @@ static void free_pv_entry(pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t locked_pmap); -static void pmap_scrub_pages(vm_paddr_t pa, int64_t size); static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, vm_offset_t va); @@ -202,11 +202,16 @@ static const struct timeval printinterval = { 60, 0 }; static struct timeval lastprint; struct vpgqueues *vpq; + uint64_t *tte, tte_data; pmap_t pmap; pv_entry_t allocated_pv, next_pv, pv; vm_offset_t va; vm_page_t m; + + KASSERT(locked_pmap->pm_context != 0, + ("context 0 not backed by pv_entry management")); + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); allocated_pv = uma_zalloc(pvzone, M_NOWAIT); @@ -234,7 +239,6 @@ if (m->hold_count || m->busy || (m->flags & PG_BUSY)) continue; TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { - UNIMPLEMENTED; va = pv->pv_va; pmap = pv->pv_pmap; /* Avoid deadlock and lock recursion. */ @@ -243,30 +247,29 @@ else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) continue; pmap->pm_stats.resident_count--; -#ifdef notyet - pte = pmap_pte_quick(pmap, va); - tpte = pte_load_clear(pte); - KASSERT((tpte & PG_W) == 0, - ("get_pv_entry: wired pte %#jx", (uintmax_t)tpte)); - if (tpte & PG_A) + + tte = tte_hash_lookup(pmap->pm_hash, va); + tte_data = *tte; + tte_hash_delete(pmap->pm_hash, va); + + KASSERT((tte_data & VTD_WIRED) == 0, + ("get_pv_entry: wired pte %#jx", (uintmax_t)tte_data)); + if (tte_data & VTD_REF) vm_page_flag_set(m, PG_REFERENCED); - if (tpte & PG_M) { - KASSERT((tpte & PG_RW), - ("get_pv_entry: modified page not writable: va: %#x, pte: %#jx", - va, (uintmax_t)tpte)); - if (pmap_track_modified(va)) + if (tte_data & VTD_W) { + KASSERT((tte_data & VTD_SW_W), + ("get_pv_entry: modified page not writable: va: %lx, tte: %lx", + va, tte_data)); + if (pmap_track_modified(locked_pmap, va)) vm_page_dirty(m); } -#endif + pmap_invalidate_page(pmap, va); TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); if (TAILQ_EMPTY(&m->md.pv_list)) vm_page_flag_clear(m, PG_WRITEABLE); m->md.pv_list_count--; -#ifdef notyet - pmap_unuse_pt(pmap, va); -#endif if (pmap != locked_pmap) PMAP_UNLOCK(pmap); @@ -320,6 +323,7 @@ { pmap_t pmap, oldpmap; + critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); @@ -330,8 +334,7 @@ oldpmap->pm_active &= ~1; pmap->pm_active |= 1; #endif - - set_pdir_scratchpad(pmap->pm_pdir); + tte_hash_set_scratchpad(pmap->pm_hash); PCPU_SET(curpmap, pmap); critical_exit(); } @@ -456,8 +459,8 @@ #ifdef notyet /* XXX this tries to map at a wacky address */ for (i = 0; i < (MSGBUF_SIZE / PAGE_SIZE); i++) - tsb_set_tte(&kernel_td[TSB8K_INDEX], ((vm_offset_t)msgbufp) + i*PAGE_SIZE , msgbuf_phys + i*PAGE_SIZE, - TTE_KERNEL | VTD_8K, 0); + tsb_set_tte(&kernel_td[TSB8K_INDEX], ((vm_offset_t)msgbufp) + i*PAGE_SIZE , + msgbuf_phys + i*PAGE_SIZE | TTE_KERNEL | VTD_8K, 0); #endif /* @@ -480,7 +483,7 @@ for (i = 0; i < KSTACK_PAGES; i++) { pa = kstack0_phys + i * PAGE_SIZE; va = kstack0 + i * PAGE_SIZE; - tsb_set_tte(&kernel_td[TSB8K_INDEX], va , pa, TTE_KERNEL | VTD_8K, 0); + tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa | TTE_KERNEL | VTD_8K, 0); } /* @@ -521,14 +524,14 @@ if (translations[i].om_size == PAGE_SIZE_4M) { tsb_assert_invalid(&kernel_td[TSB4M_INDEX], translations[i].om_start); tsb_set_tte(&kernel_td[TSB4M_INDEX], translations[i].om_start, - TTE_GET_PA(translations[i].om_tte), TTE_KERNEL | VTD_4M, 0); + TTE_GET_PA(translations[i].om_tte) | TTE_KERNEL | VTD_4M, 0); } else { for (off = 0; off < translations[i].om_size; off += PAGE_SIZE) { va = translations[i].om_start + off; pa = TTE_GET_PA(translations[i].om_tte) + off; tsb_assert_invalid(&kernel_td[TSB8K_INDEX], va); - tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, TTE_KERNEL | VTD_8K, 0); + tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa | TTE_KERNEL | VTD_8K, 0); } } } @@ -623,21 +626,25 @@ boolean_t wired) { vm_paddr_t pa, opa; - uint64_t tte_data, otte_data; + uint64_t tte_data, otte_data, *otte; vm_page_t om; int invlva; #if 0 printf("ctx=%d va=%lx prot=%x wired=%x\n", pmap->pm_context, va, prot, wired); #endif + KASSERT(pmap->pm_context != 0, + ("inserting faultable entries into context 0 without backing hash")); + + om = NULL; vm_page_lock_queues(); - om = NULL; PMAP_LOCK(pmap); sched_pin(); tte_data = pa = VM_PAGE_TO_PHYS(m); - otte_data = tsb_lookup_tte(va, pmap->pm_context); + otte = tte_hash_lookup(pmap->pm_hash, va); + otte_data = *otte; opa = TTE_GET_PA(otte_data); /* * Mapping has not changed, must be protection or wiring change. @@ -660,7 +667,7 @@ */ if (otte_data & VTD_MANAGED) { om = m; - pa |= VTD_MANAGED; + tte_data |= VTD_MANAGED; } goto validate; @@ -670,7 +677,7 @@ * handle validating new mapping. */ if (opa) { - if (otte_data & VTD_W) + if (otte_data & VTD_WIRED) pmap->pm_stats.wired_count--; if (otte_data & VTD_MANAGED) { om = PHYS_TO_VM_PAGE(opa); @@ -697,19 +704,20 @@ * Now validate mapping with desired protection/wiring. */ if ((prot & VM_PROT_WRITE) != 0) - tte_data |= (VTD_W|VTD_WR_PERM); /* XXX need to handle modify */ + tte_data |= VTD_SW_W; if ((prot & VM_PROT_EXECUTE) != 0) tte_data |= VTD_X; if (wired) tte_data |= VTD_WIRED; if (pmap == kernel_pmap) tte_data |= TTE_KERNEL_MINFLAGS; - + else + tte_data |= TTE_MINFLAGS; + if ((otte_data & ~(VTD_W|VTD_REF)) != tte_data) { if (otte_data & VTD_V) { invlva = FALSE; - tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, tte_data, - pmap->pm_context); + *otte = tte_data; if (otte_data & VTD_REF) { if (otte_data & VTD_MANAGED) vm_page_flag_set(om, PG_REFERENCED); @@ -726,14 +734,14 @@ if (invlva) pmap_invalidate_page(pmap, va); } else - tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, tte_data, - pmap->pm_context); + tte_hash_insert(pmap->pm_hash, va, tte_data); } sched_unpin(); + PMAP_UNLOCK(pmap); vm_page_unlock_queues(); - PMAP_UNLOCK(pmap); + } @@ -741,7 +749,30 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte) { - UNIMPLEMENTED; + uint64_t pa; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + PMAP_LOCK(pmap); + /* + * Enter on the PV list if part of our managed memory. Note that we + * raise IPL while manipulating pv_table since pmap_enter can be + * called at interrupt time. + */ + if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) + pmap_insert_entry(pmap, va, m); + + pmap->pm_stats.resident_count++; + + pa = VM_PAGE_TO_PHYS(m); + + if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) + pa |= VTD_MANAGED; + + tte_hash_insert(pmap->pm_hash, va, pa | TTE_MINFLAGS); + + PMAP_UNLOCK(pmap); + return (0); } @@ -752,13 +783,15 @@ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { - vm_paddr_t rtval; - UNIMPLEMENTED; + vm_paddr_t pa; + tte_t *tte; PMAP_LOCK(pmap); - + + tte = tte_hash_lookup(pmap->pm_hash, va); + pa = TTE_GET_PA(*tte) | (va & TTE_GET_PAGE_MASK(*tte)); PMAP_UNLOCK(pmap); - return (rtval); + return (pa); } /* @@ -769,7 +802,24 @@ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { - UNIMPLEMENTED; + tte_t *tte_data; + vm_page_t m; + + m = NULL; + vm_page_lock_queues(); + PMAP_LOCK(pmap); + + tte_data = tte_hash_lookup(pmap->pm_hash, va); + if (tte_data != 0 && + ((*tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) { + m = PHYS_TO_VM_PAGE(TTE_GET_PA(*tte_data)); + vm_page_hold(m); + } + + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); + + return (m); } void @@ -797,6 +847,8 @@ pv_entry_high_water = 9 * (pv_entry_max / 10); uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); + tte_hash_init(); + } /* @@ -838,8 +890,15 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t tva; +#if 0 printf("pmap_invalidate_range(sva=%lx, eva=%lx)\n", sva, eva); +#endif /* XXX SUN4V_FIXME - oversimplified logic */ + + if (pmap->pm_context != 0) { + tsb_clear_range(&pmap->pm_tsb, sva, eva); + } + if (((sva & PAGE_MASK_4M) != 0) || ((eva & PAGE_MASK_4M) != 0)) { for (tva = sva; tva < eva; tva += PAGE_SIZE_8K) invlpg(tva, pmap->pm_context); @@ -854,6 +913,10 @@ #ifdef SMP #error __FUNCTION_ not implemented #else + if (pmap->pm_context != 0) { + tsb_clear(&pmap->pm_tsb); + } + invlctx(pmap->pm_context); #endif } @@ -870,7 +933,7 @@ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t va) { - return (pmap->pm_pdir[va >> (PT_SHIFT + PAGE_SHIFT)] ? TRUE : FALSE); + return (TRUE); } /* @@ -879,7 +942,7 @@ void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { - tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa, TTE_KERNEL | VTD_8K, 0); + tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa | TTE_KERNEL | VTD_8K, 0); } /* @@ -909,10 +972,10 @@ { if ((va & PAGE_MASK_4M) == 0 && - tsb_get_tte(&kernel_td[TSB4M_INDEX], va, 0) != 0) - tsb_set_tte(&kernel_td[TSB4M_INDEX], va, 0, 0, 0); + tsb_get_tte(&kernel_td[TSB4M_INDEX], va) != 0) + tsb_clear_tte(&kernel_td[TSB4M_INDEX], va); else - tsb_set_tte(&kernel_td[TSB8K_INDEX], va, 0, 0, 0); + tsb_clear_tte(&kernel_td[TSB8K_INDEX], va); } static void @@ -1008,7 +1071,7 @@ { if ((prot & VM_PROT_WRITE) == 0) { if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { - tte_clear_phys_bit(m, VTD_WR_PERM|VTD_W); + tte_clear_phys_bit(m, VTD_SW_W | VTD_W); } else { pmap_remove_all(m); } @@ -1022,11 +1085,9 @@ pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); - /* - * The kernel does not use page tables - */ - pmap->pm_pdir = NULL; + pmap->pm_hash = NULL; pmap->pm_active = 0; + pmap->pm_context = 0; PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1039,36 +1100,14 @@ void pmap_pinit(pmap_t pmap) { - vm_page_t m, ptdpg[NPGPTD]; - static int color; - int i; - - PMAP_LOCK_INIT(pmap); - if (pmap->pm_pdir == NULL) - pmap->pm_pdir = (vm_offset_t *)kmem_alloc_nofault(kernel_map, NBPTD); + static int context = 1; /* XXX */ - /* - * allocate the page directory page(s) - */ - for (i = 0; i < NPGPTD;) { - m = vm_page_alloc(NULL, color++, - VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | - VM_ALLOC_ZERO); - if (m == NULL) - VM_WAIT; - else { - ptdpg[i++] = m; - } - } + pmap->pm_context = context++; - pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); + KASSERT(context < PMAP_CONTEXT_MAX, + ("max context limit hit - need to implement context recycling")); - for (i = 0; i < NPGPTD; i++) { - if ((ptdpg[i]->flags & PG_ZERO) == 0) - bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); - } - - + pmap->pm_hash = tte_hash_create(pmap->pm_context); pmap->pm_active = 0; TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1081,12 +1120,13 @@ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { - UNIMPLEMENTED; -#ifdef notyet - vm_offset_t pdnxt; - pd_entry_t ptpaddr; - pt_entry_t *pte; + int anychanged; + uint64_t *tte; + + + KASSERT(pmap->pm_context != 0, + ("protection downgrades not handled correctly without backing hash")); if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); @@ -1099,28 +1139,43 @@ anychanged = 0; vm_page_lock_queues(); + PMAP_LOCK(pmap); sched_pin(); - PMAP_LOCK(pmap); - for (; sva < eva; sva = va_next) { - unsigned obits, pbits, pdirindex; - /* - * Weed out invalid mappings. Note: we assume that the page - * directory table is always allocated, and in kernel virtual. - */ - if (ptpaddr == 0) - continue; + for (; sva < eva; sva += PAGE_SIZE) { + uint64_t otte_data, tte_data; + vm_page_t m; + retry: + tte = tte_hash_lookup(pmap->pm_hash, sva); + otte_data = tte_data = tte ? *tte : 0; + if (tte_data & VTD_MANAGED) { + m = NULL; + if (tte_data & VTD_REF) { + m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data)); + vm_page_flag_set(m, PG_REFERENCED); + tte_data &= ~VTD_REF; + } + if ((tte_data & VTD_W) && pmap_track_modified(pmap, sva)) { + m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data)); + vm_page_dirty(m); + } + } + + tte_data &= ~(VTD_SW_W | VTD_W); - if (pdnxt > eva) - pdnxt = eva; + if (tte_data != otte_data) { + if (!atomic_cmpset_long(tte, otte_data, tte_data)) + goto retry; + anychanged = 1; + } } + sched_unpin(); - vm_page_unlock_queues(); if (anychanged) pmap_invalidate_all(pmap); + vm_page_unlock_queues(); PMAP_UNLOCK(pmap); -#endif } /* @@ -1169,29 +1224,12 @@ void pmap_release(pmap_t pmap) { - vm_page_t m, ptdpg[NPGPTD]; - int i; - KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); pmap_lazyfix(pmap); - - for (i = 0; i < NPGPTD; i++) - ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(((vm_offset_t)pmap->pm_pdir) + PAGE_SIZE)); - - pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); - - vm_page_lock_queues(); - for (i = 0; i < NPGPTD; i++) { - m = ptdpg[i]; - - m->wire_count--; - atomic_subtract_int(&cnt.v_wire_count, 1); - vm_page_free_zero(m); - } - vm_page_unlock_queues(); + tte_hash_destroy(pmap->pm_hash); PMAP_LOCK_DESTROY(pmap); } @@ -1201,93 +1239,33 @@ void pmap_remove(pmap_t pmap, vm_offset_t start, vm_offset_t end) { - UNIMPLEMENTED; -#ifdef notyet - vm_offset_t pdnxt; - pd_entry_t ptpaddr; - pt_entry_t *pte; - int anyvalid; - + int invlva, tinvlva; + vm_offset_t tva; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; - anyvalid = 0; - vm_page_lock_queues(); sched_pin(); PMAP_LOCK(pmap); - if (pmap == kernel_pmap) { - /* only need to clear page from the appropriate TSB */ - - } - - /* - * special handling of removing one page. a very - * common operation and easy to short circuit some - * code. - */ - if (sva + PAGE_SIZE == eva) { - pmap_remove_page(pmap, sva); - goto out; - } - - for (; sva < eva; sva = pdnxt) { - unsigned pdirindex; - - /* - * Calculate index for next page table. - */ - pdnxt = (sva + NBPDR) & ~PDRMASK; - if (pmap->pm_stats.resident_count == 0) - break; - - pdirindex = sva >> PDRSHIFT; - ptpaddr = pmap->pm_pdir[pdirindex]; - - /* - * Weed out invalid mappings. Note: we assume that the page - * directory table is always allocated, and in kernel virtual. - */ - if (ptpaddr == 0) - continue; - - /* - * Limit our scan to either the end of the va represented - * by the current page table page, or to the end of the - * range being removed. - */ - if (pdnxt > eva) - pdnxt = eva; - - /* XXX SUN4V_FIXME - * Have not yet decided whether or not to use mapped pages for - * page table pages - * In any event pmap_pte_quick needs to go - */ - for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, - sva += PAGE_SIZE) { - if (*pte == 0) - continue; - anyvalid = 1; - if (pmap_remove_pte(pmap, pte, sva)) - break; + if (pmap->pm_context != 0) { + invlva = 0; + for (tva = start; tva < end; tva += PAGE_SIZE) { + tinvlva = tte_hash_delete(pmap->pm_hash, tva); + invlva = tinvlva ? tinvlva : invlva; } - - - - - + } else { + tsb_clear_range(&pmap->pm_tsb, start, end); + invlva = 1; } -out: sched_unpin(); vm_page_unlock_queues(); - if (anyvalid) + if (invlva) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); -#endif + } /* @@ -1306,7 +1284,44 @@ void pmap_remove_all(vm_page_t m) { - UNIMPLEMENTED; + pv_entry_t pv; + uint64_t *tte, tte_data; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + sched_pin(); + while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { + PMAP_LOCK(pv->pv_pmap); + pv->pv_pmap->pm_stats.resident_count--; + KASSERT(pv->pv_pmap->pm_context != 0, + ("cannot special case absence of backing hash")); + tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va); + tte_data = *tte; + tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va); + if (tte_data & VTD_WIRED) + pv->pv_pmap->pm_stats.wired_count--; + if (tte_data & VTD_REF) + vm_page_flag_set(m, PG_REFERENCED); + + /* + * Update the vm_page_t clean and reference bits. + */ + if (tte_data & VTD_W) { + KASSERT((tte_data & VTD_SW_W), + ("pmap_remove_all: modified page not writable: va: %lx, tte: %lx", + pv->pv_va, tte_data)); + if (pmap_track_modified(pv->pv_pmap, pv->pv_va)) + vm_page_dirty(m); + } + + pmap_invalidate_page(pv->pv_pmap, pv->pv_va); + TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count--; + PMAP_UNLOCK(pv->pv_pmap); + free_pv_entry(pv); + } + vm_page_flag_clear(m, PG_WRITEABLE); + sched_unpin(); } static void @@ -1338,12 +1353,70 @@ void -pmap_remove_pages(pmap_t pmap, vm_offset_t start, vm_offset_t end) +pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - UNIMPLEMENTED; + + vm_page_t m; + pv_entry_t pv, npv; + tte_t *tte, tte_data; + + KASSERT(pmap->pm_context != 0, + ("cannot special case absence of backing hash")); + + vm_page_lock_queues(); + PMAP_LOCK(pmap); + sched_pin(); + for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { + + if (pv->pv_va >= eva || pv->pv_va < sva) { + npv = TAILQ_NEXT(pv, pv_plist); + continue; + } + tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va); + tte_data = tte ? *tte : 0; + + if (tte_data == 0) { + printf("TTE at %p IS ZERO @ VA %016lx\n", + tte, pv->pv_va); + panic("bad tte"); + } + + /* + * We cannot remove wired pages from a + * process' mapping at this time + */ + + if (tte_data & VTD_WIRED) { + npv = TAILQ_NEXT(pv, pv_plist); >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200603040844.k248iPiv019247>