Date: Sun, 2 Jul 2006 20:58:45 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 100449 for review Message-ID: <200607022058.k62Kwjvl082737@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=100449 Change 100449 by kmacy@kmacy_storage:sun4v_work_stable on 2006/07/02 20:58:32 workaround apparent bug in hypervisor preventing multiple page sizes in a given TSB by staying with 4MB pages for the direct-mapped area and enforcing 4MB alignment in phys_avail base bounds in some cases nucleus memory was not being removed from phys_avail, particularly if there is only one phys_avail range - this lead the kernel to believe that the nucleus memory was available for general use and thus zero it Affected files ... .. //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/pmap.c#6 edit Differences ... ==== //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/pmap.c#6 (text+ko) ==== @@ -424,59 +424,14 @@ { struct pmap *pm; vm_offset_t off, va; - vm_paddr_t pa, kernel_hash_pa; + vm_paddr_t pa, kernel_hash_pa, phys_avail_start, nucleus_memory_start; vm_size_t physsz, virtsz, kernel_hash_size; ihandle_t pmem, vmem; int i, sz, j; uint64_t tsb_8k_size, tsb_4m_size, error, physmem_tunable; - /* - * Find out what physical memory is available from the prom and - * initialize the phys_avail array. This must be done before - * pmap_bootstrap_alloc is called. - */ - if ((pmem = OF_finddevice("/memory")) == -1) - panic("pmap_bootstrap: finddevice /memory"); - if ((sz = OF_getproplen(pmem, "available")) == -1) - panic("pmap_bootstrap: getproplen /memory/available"); - if (sizeof(vm_paddr_t)*128 < sz) /* FIXME */ - panic("pmap_bootstrap: phys_avail too small"); - if (sizeof(mra) < sz) - panic("pmap_bootstrap: mra too small"); - bzero(mra, sz); - if (OF_getprop(pmem, "available", mra, sz) == -1) - panic("pmap_bootstrap: getprop /memory/available"); - - sz /= sizeof(*mra); - CTR0(KTR_PMAP, "pmap_bootstrap: physical memory"); - qsort(mra, sz, sizeof (*mra), mr_cmp); - physsz = 0; - if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) - physmem = atop(physmem_tunable); - - printf("desired physmem=0x%lx\n", physmem_tunable); - for (i = 0, j = 0; i < sz; i++, j += 2) { - CTR2(KTR_PMAP, "start=%#lx size=%#lx\n", mra[i].mr_start, - mra[i].mr_size); - KDPRINTF("start=%#lx size=%#lx\n", mra[i].mr_start, - mra[i].mr_size); - if (physmem_tunable != 0 && (physsz + mra[i].mr_size) >= physmem_tunable) { - if (physsz < physmem_tunable) { - phys_avail[j] = mra[i].mr_start; - phys_avail[j + 1] = mra[i].mr_start + - (physmem_tunable - physsz); - physsz = physmem_tunable; - } - break; - } - phys_avail[j] = mra[i].mr_start; - phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size; - physsz += mra[i].mr_size; - } - physmem = btoc(physsz); - if ((vmem = OF_finddevice("/virtual-memory")) == -1) panic("pmap_bootstrap: finddevice /virtual-memory"); if ((sz = OF_getproplen(vmem, "translations")) == -1) @@ -488,10 +443,10 @@ panic("pmap_bootstrap: getprop /virtual-memory/translations"); sz /= sizeof(*translations); translations_size = sz; + nucleus_memory_start = 0; CTR0(KTR_PMAP, "pmap_bootstrap: translations"); qsort(translations, sz, sizeof (*translations), om_cmp); for (i = 0; i < sz; i++) { - int j, k; KDPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n", translations[i].om_size, translations[i].om_start, translations[i].om_tte); @@ -504,40 +459,102 @@ KCONTEXT, pa | TTE_KERNEL | VTD_4M, MAP_ITLB | MAP_DTLB); if (error != H_EOK) panic("map_perm_addr returned error=%ld", error); - + + if ((nucleus_memory_start == 0) || (pa < nucleus_memory_start)) + nucleus_memory_start = pa; nucleus_mappings[permanent_mappings++] = pa; nucleus_memory += PAGE_SIZE_4M; #ifdef SMP mp_add_nucleus_mapping(translations[i].om_start, pa | TTE_KERNEL | VTD_4M); #endif - for (j = 0, k = 0; phys_avail[j + 2] != 0; j += 2, k += 2) { + } + } + + /* + * Find out what physical memory is available from the prom and + * initialize the phys_avail array. This must be done before + * pmap_bootstrap_alloc is called. + */ + if ((pmem = OF_finddevice("/memory")) == -1) + panic("pmap_bootstrap: finddevice /memory"); + if ((sz = OF_getproplen(pmem, "available")) == -1) + panic("pmap_bootstrap: getproplen /memory/available"); + if (sizeof(vm_paddr_t)*128 < sz) /* FIXME */ + panic("pmap_bootstrap: phys_avail too small"); + if (sizeof(mra) < sz) + panic("pmap_bootstrap: mra too small"); + bzero(mra, sz); + if (OF_getprop(pmem, "available", mra, sz) == -1) + panic("pmap_bootstrap: getprop /memory/available"); + + sz /= sizeof(*mra); + CTR0(KTR_PMAP, "pmap_bootstrap: physical memory"); + + qsort(mra, sz, sizeof (*mra), mr_cmp); + physsz = 0; + + if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) + physmem = atop(physmem_tunable); + + KDPRINTF("desired physmem=0x%lx\n", physmem_tunable); + for (i = 0, j = 0; i < sz; i++) { + vm_paddr_t start = mra[i].mr_start; + uint64_t size = mra[i].mr_size; + CTR2(KTR_PMAP, "start=%#lx size=%#lx\n", mra[i].mr_start, mra[i].mr_size); + KDPRINTF("start=%#lx size=%#lx\n", mra[i].mr_start, mra[i].mr_size); + if (nucleus_memory_start == mra[i].mr_start) { + mra[i].mr_start += 2*PAGE_SIZE_4M; + mra[i].mr_size -= 2*PAGE_SIZE_4M; + } + if (nucleus_memory_start == (start + size - 2*PAGE_SIZE_4M)) + mra[i].mr_size -= 2*PAGE_SIZE_4M; - if (pa == phys_avail[j]) { - phys_avail_tmp[k] = phys_avail[j] + PAGE_SIZE_4M; - phys_avail_tmp[k + 1] = phys_avail[j + 1]; - break; - } else if (phys_avail[j] < pa && pa + PAGE_SIZE_4M == phys_avail[j + 1]) { - phys_avail_tmp[k] = phys_avail[j]; - phys_avail_tmp[k + 1] = pa; - } else if (phys_avail[j] < pa && pa + PAGE_SIZE_4M < phys_avail[j + 1]) { - phys_avail_tmp[k] = phys_avail[j]; - phys_avail_tmp[k + 1] = pa; - phys_avail_tmp[k + 2] = pa + PAGE_SIZE_4M; - phys_avail_tmp[k + 3] = phys_avail[j + 1]; - k += 2; - } else { - phys_avail_tmp[k] = phys_avail[j]; - phys_avail_tmp[k + 1] = phys_avail[j + 1]; - } + if ((nucleus_memory_start > start) && (nucleus_memory_start < (start + size))) { + uint64_t firstsize = (nucleus_memory_start - start); + phys_avail[j] = start; + if ((physmem_tunable != 0) && ((physsz + firstsize) > physmem_tunable)) { + phys_avail[j+1] = start + (physmem_tunable - physsz); + physsz = physmem_tunable; + break; } - - for (j = 0; phys_avail_tmp[j + 2] != 0; phys_avail[j] = phys_avail_tmp[j], - phys_avail[j + 1] = phys_avail_tmp[j + 1], j += 2) - ; - } + phys_avail[j+1] = nucleus_memory_start; + size = size - firstsize - 2*PAGE_SIZE_4M; + mra[i].mr_start = nucleus_memory_start + 2*PAGE_SIZE_4M; + mra[i].mr_size = size; + physsz += firstsize + 2*PAGE_SIZE_4M; + j += 2; + } + if (mra[i].mr_size < PAGE_SIZE_4M) + continue; + if ((mra[i].mr_start & PAGE_MASK_4M) && (mra[i].mr_size < 2*PAGE_SIZE_4M)) + continue; + if (mra[i].mr_start & PAGE_MASK_4M) { + uint64_t newstart, roundup; + newstart = ((mra[i].mr_start + (PAGE_SIZE_4M-1)) & ~PAGE_MASK_4M); + roundup = newstart - mra[i].mr_start; + mra[i].mr_size -= roundup; + mra[i].mr_start = newstart; + } + mra[i].mr_size &= ~PAGE_MASK_4M; + phys_avail[j] = mra[i].mr_start; + if (physmem_tunable != 0 && ((physsz + mra[i].mr_size) >= physmem_tunable)) { + size = physmem_tunable - physsz; + phys_avail[j + 1] = mra[i].mr_start + size; + physsz = physmem_tunable; + break; + } + phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size; + physsz += mra[i].mr_size; + j += 2; } + phys_avail_start = phys_avail[0]; + physmem = btoc(physsz); + + for (i = 0; phys_avail[i] != 0; i += 2) + KDPRINTF("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n", + i, phys_avail[i], i+1, phys_avail[i+1]); /* * Calculate the size of kernel virtual memory, and the size and mask * for the kernel tsb. @@ -609,7 +626,7 @@ * currently (not by design) used for permanent mappings */ - tsb_4m_size = (virtsz >> (PAGE_SHIFT_4M - TTE_SHIFT)); + tsb_4m_size = (virtsz >> (PAGE_SHIFT_4M - TTE_SHIFT)) << 3; pa = pmap_bootstrap_alloc(tsb_4m_size); KDPRINTF("tsb_4m_pa is 0x%lx tsb_4m_size is 0x%lx\n", pa, tsb_4m_size); @@ -684,22 +701,35 @@ } } + error = hv_set_ctx0(MAX_TSB_INFO, vtophys((vm_offset_t)&kernel_td)); + if (error != H_EOK) + panic("failed to set ctx0 TSBs error: %ld", error); +#ifdef SMP + mp_set_tsb_desc_ra(vtophys((vm_offset_t)&kernel_td)); +#endif /* * setup direct mappings * */ - pa = PAGE_SIZE_4M; - for (i = 0; phys_avail[i] != 0; i += 2) - for (; pa < phys_avail[i + 1]; pa += PAGE_SIZE_4M) + i = 0; + pa = phys_avail_start; + do { + for (; pa < phys_avail[i + 1]; pa += PAGE_SIZE_4M) { + tsb_assert_invalid(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa)); tsb_set_tte_real(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa), pa | TTE_KERNEL | VTD_4M, 0); - - for (i = 0; i < 2; i++) { + + } + i += 2; + pa = phys_avail[i]; + } while (pa != 0); + + for (i = 0; i < 2; i++) { pa = nucleus_mappings[i]; + tsb_assert_invalid(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa)); tsb_set_tte_real(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa), pa | TTE_KERNEL | VTD_4M, 0); - } /* * Get the available physical memory ranges from /memory/reg. These @@ -722,14 +752,6 @@ TAILQ_INIT(&kernel_pmap->pm_pvlist); - error = hv_set_ctx0(MAX_TSB_INFO, vtophys((vm_offset_t)&kernel_td)); - if (error != H_EOK) - panic("failed to set ctx0 TSBs error: %ld", error); - -#ifdef SMP - mp_set_tsb_desc_ra(vtophys((vm_offset_t)&kernel_td)); -#endif - /* * This could happen earlier - but I put it here to avoid * attempts to do updates until they're legal
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200607022058.k62Kwjvl082737>