Date: Mon, 24 Apr 2006 02:07:20 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 95975 for review Message-ID: <200604240207.k3O27KRp052728@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=95975 Change 95975 by kmacy@kmacy_storage:sun4v_rwbuf on 2006/04/24 02:07:00 massive fixes for various issues including OFW not registering allocated pages, IPI acks etc. Affected files ... .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#36 edit Differences ... ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#36 (text+ko) ==== @@ -31,6 +31,7 @@ #include <sys/param.h> #include <sys/kernel.h> +#include <sys/kdb.h> #include <sys/ktr.h> #include <sys/lock.h> #include <sys/msgbuf.h> @@ -68,6 +69,7 @@ #include <machine/tte.h> #include <machine/tte_hash.h> #include <machine/pcb.h> +#include <machine/pstate.h> #include <machine/tsb.h> #include <machine/hypervisor_api.h> @@ -94,6 +96,7 @@ * Map of physical memory reagions. */ vm_paddr_t phys_avail[128]; +vm_paddr_t phys_avail_tmp[128]; static struct ofw_mem_region mra[128]; static struct ofw_map translations[128]; static int translations_size; @@ -355,15 +358,17 @@ pmap_t pmap, oldpmap; DPRINTF("activating pmap\n"); - critical_enter(); + spinlock_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - pmap->pm_active = PCPU_GET(cpumask); + pmap->pm_tlbactive = pmap->pm_active = PCPU_GET(cpumask); + #else oldpmap->pm_active &= ~1; pmap->pm_active |= 1; + pmap->pm_tlbactive |= 1; #endif #if 0 tsb_clear(&pmap->pm_tsb); @@ -377,7 +382,7 @@ hv_set_ctxnon0(1, pmap->pm_tsb_ra); stxa(MMU_CID_S, ASI_MMU_CONTEXTID, pmap->pm_context); membar(Sync); - critical_exit(); + spinlock_exit(); } vm_offset_t @@ -399,7 +404,7 @@ ihandle_t pmem, vmem; int i, sz, j; uint64_t tsb_8k_size, tsb_4m_size, error; - + /* * Find out what physical memory is available from the prom and * initialize the phys_avail array. This must be done before @@ -426,7 +431,9 @@ physmem = ctob(physmem); for (i = 0, j = 0; i < sz; i++, j += 2) { - CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start, + CTR2(KTR_PMAP, "start=%#lx size=%#lx\n", mra[i].mr_start, + mra[i].mr_size); + DPRINTF("start=%#lx size=%#lx\n", mra[i].mr_start, mra[i].mr_size); if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) { if (btoc(physsz) < physmem) { @@ -443,6 +450,71 @@ } physmem = btoc(physsz); + if ((vmem = OF_finddevice("/virtual-memory")) == -1) + panic("pmap_bootstrap: finddevice /virtual-memory"); + if ((sz = OF_getproplen(vmem, "translations")) == -1) + panic("pmap_bootstrap: getproplen translations"); + if (sizeof(translations) < sz) + panic("pmap_bootstrap: translations too small"); + bzero(translations, sz); + if (OF_getprop(vmem, "translations", translations, sz) == -1) + panic("pmap_bootstrap: getprop /virtual-memory/translations"); + sz /= sizeof(*translations); + translations_size = sz; + CTR0(KTR_PMAP, "pmap_bootstrap: translations"); + qsort(translations, sz, sizeof (*translations), om_cmp); + for (i = 0; i < sz; i++) { + int j, k; + DPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n", + translations[i].om_size, translations[i].om_start, + translations[i].om_tte); + if (translations[i].om_size == PAGE_SIZE_4M && + (translations[i].om_start == KERNBASE || + translations[i].om_start == KERNBASE + PAGE_SIZE_4M)) { + DPRINTF("mapping permanent translation\n"); + pa = TTE_GET_PA(translations[i].om_tte); + error = hv_mmu_map_perm_addr((char *)translations[i].om_start, + KCONTEXT, pa | TTE_KERNEL | VTD_4M, MAP_ITLB | MAP_DTLB); + if (error != H_EOK) + panic("map_perm_addr returned error=%ld", error); + + nucleus_mappings[permanent_mappings++] = pa; + nucleus_memory += PAGE_SIZE_4M; +#ifdef SMP + mp_add_nucleus_mapping(translations[i].om_start, + pa | TTE_KERNEL | VTD_4M); +#endif + for (j = 0, k = 0; phys_avail[j + 2] != 0; j += 2, k += 2) { + + if (pa == phys_avail[j]) { + phys_avail_tmp[k] = phys_avail[j] + PAGE_SIZE_4M; + phys_avail_tmp[k + 1] = phys_avail[j + 1]; + break; + } else if (phys_avail[j] < pa && pa + PAGE_SIZE_4M == phys_avail[j + 1]) { + phys_avail_tmp[k] = phys_avail[j]; + phys_avail_tmp[k + 1] = pa; + } else if (phys_avail[j] < pa && pa + PAGE_SIZE_4M < phys_avail[j + 1]) { + phys_avail_tmp[k] = phys_avail[j]; + phys_avail_tmp[k + 1] = pa; + phys_avail_tmp[k + 2] = pa + PAGE_SIZE_4M; + phys_avail_tmp[k + 3] = phys_avail[j + 1]; + k += 2; + } else { + phys_avail_tmp[k] = phys_avail[j]; + phys_avail_tmp[k + 1] = phys_avail[j + 1]; + } + } + + for (j = 0; phys_avail_tmp[j + 2] != 0; phys_avail[j] = phys_avail_tmp[j], + phys_avail[j + 1] = phys_avail_tmp[j + 1], j += 2) + ; +#if 0 + for (i = 0; phys_avail_tmp[i + 1] != 0; i++) + phys_avail[i] = phys_avail_tmp[i]; +#endif + } + } + /* * Calculate the size of kernel virtual memory, and the size and mask * for the kernel tsb. @@ -466,7 +538,7 @@ if (kernel_hash_pa & PAGE_MASK_4M) panic("pmap_bootstrap: hashtable pa unaligned\n"); pmap_scrub_pages(kernel_hash_pa, PAGE_SIZE_4M); - + printf("allocated hash\n"); /* * Set up TSB descriptors for the hypervisor * @@ -556,26 +628,14 @@ /* * Calculate the last available physical address. */ - for (i = 0; phys_avail[i + 2] != 0; i += 2) - ; + for (i = 0; phys_avail[i + 2] != 0; i += 2) + DPRINTF("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n", + i, phys_avail[i], i+1, phys_avail[i+1]); Maxmem = sparc64_btop(phys_avail[i + 1]); /* * Add the prom mappings to the kernel tsb. */ - if ((vmem = OF_finddevice("/virtual-memory")) == -1) - panic("pmap_bootstrap: finddevice /virtual-memory"); - if ((sz = OF_getproplen(vmem, "translations")) == -1) - panic("pmap_bootstrap: getproplen translations"); - if (sizeof(translations) < sz) - panic("pmap_bootstrap: translations too small"); - bzero(translations, sz); - if (OF_getprop(vmem, "translations", translations, sz) == -1) - panic("pmap_bootstrap: getprop /virtual-memory/translations"); - sz /= sizeof(*translations); - translations_size = sz; - CTR0(KTR_PMAP, "pmap_bootstrap: translations"); - qsort(translations, sz, sizeof (*translations), om_cmp); for (i = 0; i < sz; i++) { CTR3(KTR_PMAP, "translation: start=%#lx size=%#lx tte=%#lx", @@ -584,37 +644,18 @@ DPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n", translations[i].om_size, translations[i].om_start, translations[i].om_tte); - - if (translations[i].om_size == PAGE_SIZE_4M && - (translations[i].om_start == KERNBASE || - translations[i].om_start == KERNBASE + PAGE_SIZE_4M)) { - DPRINTF("mapping permanent translation\n"); - pa = TTE_GET_PA(translations[i].om_tte); - error = hv_mmu_map_perm_addr((char *)translations[i].om_start, - KCONTEXT, pa | TTE_KERNEL | VTD_4M, MAP_ITLB | MAP_DTLB); - nucleus_mappings[permanent_mappings++] = pa; - nucleus_memory += PAGE_SIZE_4M; - -#ifdef SMP - mp_add_nucleus_mapping(translations[i].om_start, - pa | TTE_KERNEL | VTD_4M); -#endif - - if (error != H_EOK) - panic("map_perm_addr returned error=%ld", error); + + if (translations[i].om_start < VM_MIN_PROM_ADDRESS || + translations[i].om_start > VM_MAX_PROM_ADDRESS) continue; - } else if (translations[i].om_start < VM_MIN_PROM_ADDRESS || - translations[i].om_start > VM_MAX_PROM_ADDRESS) { - continue; - } else { - for (off = 0; off < translations[i].om_size; - off += PAGE_SIZE) { - va = translations[i].om_start + off; - pa = TTE_GET_PA(translations[i].om_tte) + off; - tsb_assert_invalid(&kernel_td[TSB8K_INDEX], va); - tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa | - TTE_KERNEL | VTD_8K, 0); - } + + for (off = 0; off < translations[i].om_size; + off += PAGE_SIZE) { + va = translations[i].om_start + off; + pa = TTE_GET_PA(translations[i].om_tte) + off; + tsb_assert_invalid(&kernel_td[TSB8K_INDEX], va); + tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa | + TTE_KERNEL | VTD_8K, 0); } } @@ -924,6 +965,7 @@ pmap_invalidate_page(pmap, va); } else { tte_hash_insert(pmap->pm_hash, va, tte_data); + membar(Sync); } } @@ -974,7 +1016,6 @@ tte_data |= VTD_MANAGED; tte_hash_insert(pmap->pm_hash, va, tte_data | TTE_MINFLAGS); - PMAP_UNLOCK(pmap); return (0); @@ -1012,14 +1053,14 @@ m = NULL; vm_page_lock_queues(); PMAP_LOCK(pmap); - + sched_pin(); tte_data = tte_hash_lookup(pmap->pm_hash, va); if (tte_data != 0 && ((*tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) { m = PHYS_TO_VM_PAGE(TTE_GET_PA(*tte_data)); vm_page_hold(m); } - + sched_unpin(); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); @@ -1081,13 +1122,21 @@ pmap_ipi(pmap_t pmap, char *func, uint64_t arg1, uint64_t arg2) { - int active; + int i, active, cpu_count; + u_int cpus; cpumask_t cpumask; + uint16_t *cpulist; + uint32_t ackmask, ackexpect; + int inext; if (!smp_started) return; - + cpumask = PCPU_GET(cpumask); + cpulist = PCPU_GET(cpulist); + + if (rdpr(pil) != 14) + panic("pil %ld != 14", rdpr(pil)); #ifndef CPUMASK_NOT_BEING_ERRONEOUSLY_CHANGED /* by definition cpumask should have curcpu's bit set */ @@ -1096,6 +1145,14 @@ cpumask, (1 << curcpu)); /* tlbactive should be set if we're using a pmap */ + if ((cpumask & pmap->pm_active) == 0 && curthread->td_proc && curthread->td_proc->p_pid != 1) { + if (curthread->td_proc) + printf("%s not active\n", + curthread->td_proc->p_comm); + printf("td->pcb->pcb_kstack=0x%lx\n", curthread->td_pcb->pcb_kstack); + panic("cpumask(0x%x) & active (0x%x) == 0 pid == %d\n", + cpumask, pmap->pm_active, curthread->td_proc->p_pid); + } if ((cpumask & pmap->pm_tlbactive) == 0) panic("cpumask(0x%x) & tlbactive (0x%x) == 0\n", cpumask, pmap->pm_tlbactive); @@ -1103,73 +1160,137 @@ if (cpumask == pmap->pm_tlbactive) return; - +#if 0 if (pmap != kernel_pmap) - active = pmap->pm_tlbactive & ~cpumask; + active = (pmap->pm_tlbactive & ~cpumask); else +#endif active = PCPU_GET(other_cpus); + +#if 1 - cpu_ipi_selected(active, (uint64_t)func, (uint64_t)arg1, (uint64_t)arg2); + for (cpu_count = 0, i = 0, ackexpect = 0, cpus = active; i < 32 && cpus;) { + + if (!(cpus & 0x1) /*|| (i & ~0x3) == (curcpu & ~0x3) */ ) + { + cpus = cpus >> 1; + i++; + continue; + } + + cpulist[cpu_count] = (uint16_t)i; + cpu_count++; + ackexpect |= (1 << i); +#if 0 + inext = ((i & ~0x3) + 4); +#else + inext = i++; +#endif + cpus = (cpus >> (inext - i)); + i = inext; + } +#else + inext = i = cpus = 0; + cpulist[0] = curcpu ? 0 : 1; + cpu_count = 1; + ackexpect = curcpu ? 1 : 2; +#endif + + if (cpu_count == 0) + return; + + ackmask = 0; + cpu_ipi_selected(cpu_count, cpulist, (uint64_t)func, (uint64_t)arg1, + (uint64_t)arg2, (uint64_t *)&ackmask); + + while (ackmask != ackexpect) { + DELAY(1); + i++; + if (i > 1000000) + panic(" ackmask=0x%x active=0x%x\n", ackmask, ackexpect); + } } #endif void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { + spinlock_enter(); tsb_clear_tte(&pmap->pm_tsb, va); DPRINTF("pmap_invalidate_page(va=0x%lx)\n", va); invlpg(va, pmap->pm_context); #ifdef SMP + pmap_ipi(pmap, (void *)tl_invlpg, (uint64_t)va, (uint64_t)pmap->pm_context); #endif + spinlock_exit(); + + + } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t tva; +#ifdef SMP char *func; +#endif + spinlock_enter(); + if (pmap != kernel_pmap) DPRINTF("pmap_invalidate_range(sva=%lx, eva=%lx)\n", sva, eva); if ((((eva - sva) >> PAGE_SHIFT) < MAX_TSB_CLEARS) || (pmap->pm_context == 0)) { tsb_clear_range(&pmap->pm_tsb, sva, eva); - func = tl_invltlb; } else { tsb_clear(&pmap->pm_tsb); - func = tl_invlctx; } /* XXX */ invltlb(); + if ((((eva - sva) >> PAGE_SHIFT) < MAX_INVALIDATES)) { for (tva = sva; tva < eva; tva += PAGE_SIZE_8K) invlpg(tva, pmap->pm_context); - } else if (pmap->pm_context) { + } else if (pmap->pm_context) invlctx(pmap->pm_context); - } else + else invltlb(); + #ifdef SMP - pmap_ipi(pmap, (void *)func, 0, 0); - pmap->pm_tlbactive = pmap->pm_active; + if (pmap == kernel_pmap) + func = tl_invltlb; + else + func = tl_invlctx; + + + pmap_ipi(pmap, (void *)func, pmap->pm_context, 0); + if (pmap != kernel_pmap) + pmap->pm_tlbactive = pmap->pm_active; #endif - + spinlock_exit(); } void pmap_invalidate_all(pmap_t pmap) { char *func; + + spinlock_enter(); + + if (pmap == kernel_pmap) + panic("invalidate_all called on kernel_pmap"); + tsb_clear(&pmap->pm_tsb); - if (pmap->pm_context) { invlctx(pmap->pm_context); func = tl_invlctx; @@ -1180,9 +1301,11 @@ #ifdef SMP pmap_ipi(pmap, func, pmap->pm_context, 0); - pmap->pm_tlbactive = pmap->pm_active; + if (pmap != kernel_pmap) + pmap->pm_tlbactive = pmap->pm_active; #endif + spinlock_exit(); } boolean_t @@ -1203,7 +1326,7 @@ /* * Map a wired page into kernel virtual address space. */ -void +static void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { tte_hash_insert(kernel_pmap->pm_hash, va, pa | TTE_KERNEL | VTD_8K); @@ -1235,7 +1358,7 @@ return pa; } -void +static void pmap_kremove(vm_offset_t va) { tte_hash_delete(kernel_pmap->pm_hash, va); @@ -1352,7 +1475,8 @@ pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); - pmap->pm_active = 0; + pmap->pm_active = ~0; + pmap->pm_tlbactive = ~0; pmap->pm_context = 0; PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvlist); @@ -1375,6 +1499,7 @@ pmap->pm_hash = tte_hash_create(pmap->pm_context, &pmap->pm_hashscratch); pmap->pm_tsb_ra = tsb_init(&pmap->pm_tsb, &pmap->pm_tsbscratch); pmap->pm_active = 0; + pmap->pm_tlbactive = 0; TAILQ_INIT(&pmap->pm_pvlist); PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1520,7 +1645,6 @@ sched_pin(); PMAP_LOCK(pmap); for (tva = start; tva < end; tva += PAGE_SIZE) { - if ((tte = tte_hash_lookup(pmap->pm_hash, tva)) == NULL) continue; pmap_remove_tte(pmap, tte, tva); @@ -1530,8 +1654,12 @@ } sched_unpin(); vm_page_unlock_queues(); - if (invlva) - pmap_invalidate_all(pmap); + if (invlva) { + if (pmap == kernel_pmap) + pmap_invalidate_range(pmap, start, end); + else + pmap_invalidate_all(pmap); + } PMAP_UNLOCK(pmap); } @@ -1621,7 +1749,7 @@ void -pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +pmap_remove_pages(pmap_t pmap) { vm_page_t m; @@ -1632,12 +1760,6 @@ PMAP_LOCK(pmap); sched_pin(); for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { -#if 0 - if (pv->pv_va >= eva || pv->pv_va < sva) { - npv = TAILQ_NEXT(pv, pv_plist); - continue; - } -#endif tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va); tte_data = tte ? *tte : 0; @@ -1658,7 +1780,6 @@ m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data)); pmap->pm_stats.resident_count--; - tte_hash_delete(pmap->pm_hash, pv->pv_va); if (tte_data & VTD_W) { vm_page_dirty(m); @@ -1675,6 +1796,8 @@ free_pv_entry(pv); } + tte_hash_delete_all(pmap->pm_hash); + sched_unpin(); pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); @@ -1744,12 +1867,7 @@ void pmap_zero_page(vm_page_t m) { - uint64_t bytes_zeroed, error; - - error = hv_mem_scrub(VM_PAGE_TO_PHYS(m), PAGE_SIZE, &bytes_zeroed); - if (error || bytes_zeroed != PAGE_SIZE) - panic("hv_mem_scrub failed error: %ld - bytes_zeroed: %ld\n", - error, bytes_zeroed); + bzero((char *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE); } void @@ -1768,12 +1886,21 @@ void pmap_zero_page_idle(vm_page_t m) { - uint64_t bytes_zeroed, error; - error = hv_mem_scrub(VM_PAGE_TO_PHYS(m), PAGE_SIZE, &bytes_zeroed); - if (error || bytes_zeroed != PAGE_SIZE) - panic("hv_mem_scrub failed error: %ld - bytes_zeroed: %ld\n", - error, bytes_zeroed); + int i; + for (i = 0; i < (nucleus_memory >> PAGE_SHIFT_4M); i++) { + if (VM_PAGE_TO_PHYS(m) >= nucleus_mappings[i] && + VM_PAGE_TO_PHYS(m) < nucleus_mappings[i] + PAGE_SIZE_4M) + panic("zeroing nucleus"); + if (pmap_kextract(TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m))) >= + nucleus_mappings[i] && + pmap_kextract(TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m))) < + nucleus_mappings[i] + PAGE_SIZE_4M) + panic("zeroing nucleus in direct area"); + } + + + bzero((char *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200604240207.k3O27KRp052728>