Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 2 Jul 2006 20:58:45 GMT
From:      Kip Macy <kmacy@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 100449 for review
Message-ID:  <200607022058.k62Kwjvl082737@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=100449

Change 100449 by kmacy@kmacy_storage:sun4v_work_stable on 2006/07/02 20:58:32

	workaround apparent bug in hypervisor preventing multiple page sizes in a given TSB
	by staying with 4MB pages for the direct-mapped area and enforcing 4MB alignment in 
	phys_avail base bounds
	in some cases nucleus memory was not being removed from phys_avail, particularly if 
	there is only one phys_avail range - this lead the kernel to believe that the nucleus
	memory was available for general use and thus zero it

Affected files ...

.. //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/pmap.c#6 edit

Differences ...

==== //depot/projects/kmacy_sun4v_stable/src/sys/sun4v/sun4v/pmap.c#6 (text+ko) ====

@@ -424,59 +424,14 @@
 {
 	struct pmap *pm;
 	vm_offset_t off, va;
-	vm_paddr_t pa, kernel_hash_pa;
+	vm_paddr_t pa, kernel_hash_pa, phys_avail_start, nucleus_memory_start;
 	vm_size_t physsz, virtsz, kernel_hash_size;
 	ihandle_t pmem, vmem;
 	int i, sz, j;
 	uint64_t tsb_8k_size, tsb_4m_size, error, physmem_tunable;
 
-	/*
-	 * Find out what physical memory is available from the prom and
-	 * initialize the phys_avail array.  This must be done before
-	 * pmap_bootstrap_alloc is called.
-	 */
-	if ((pmem = OF_finddevice("/memory")) == -1)
-		panic("pmap_bootstrap: finddevice /memory");
-	if ((sz = OF_getproplen(pmem, "available")) == -1)
-		panic("pmap_bootstrap: getproplen /memory/available");
-	if (sizeof(vm_paddr_t)*128 < sz) /* FIXME */
-		panic("pmap_bootstrap: phys_avail too small");
-	if (sizeof(mra) < sz)
-		panic("pmap_bootstrap: mra too small");
-	bzero(mra, sz);
-	if (OF_getprop(pmem, "available", mra, sz) == -1)
-		panic("pmap_bootstrap: getprop /memory/available");
-
-	sz /= sizeof(*mra);
-	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
 
-	qsort(mra, sz, sizeof (*mra), mr_cmp);
-	physsz = 0;
 
-        if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
-                physmem = atop(physmem_tunable);
-
-	printf("desired physmem=0x%lx\n", physmem_tunable);
-	for (i = 0, j = 0; i < sz; i++, j += 2) {
-		CTR2(KTR_PMAP, "start=%#lx size=%#lx\n", mra[i].mr_start,
-		    mra[i].mr_size);
-		KDPRINTF("start=%#lx size=%#lx\n", mra[i].mr_start,
-		    mra[i].mr_size);
-		if (physmem_tunable != 0 && (physsz + mra[i].mr_size) >= physmem_tunable) {
-			if (physsz < physmem_tunable) {
-				phys_avail[j] = mra[i].mr_start;
-				phys_avail[j + 1] = mra[i].mr_start +
-				    (physmem_tunable - physsz);
-				physsz = physmem_tunable;
-			}
-			break;
-		}
-		phys_avail[j] = mra[i].mr_start;
-		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
-		physsz += mra[i].mr_size;
-	}
-	physmem = btoc(physsz);
-	
 	if ((vmem = OF_finddevice("/virtual-memory")) == -1)
 		panic("pmap_bootstrap: finddevice /virtual-memory");
 	if ((sz = OF_getproplen(vmem, "translations")) == -1)
@@ -488,10 +443,10 @@
 		panic("pmap_bootstrap: getprop /virtual-memory/translations");
 	sz /= sizeof(*translations);
 	translations_size = sz;
+	nucleus_memory_start = 0;
 	CTR0(KTR_PMAP, "pmap_bootstrap: translations");
 	qsort(translations, sz, sizeof (*translations), om_cmp);
 	for (i = 0; i < sz; i++) {
-		int j, k;
 		KDPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n", 
 			translations[i].om_size, translations[i].om_start, 
 			translations[i].om_tte);
@@ -504,40 +459,102 @@
 				KCONTEXT, pa | TTE_KERNEL | VTD_4M, MAP_ITLB | MAP_DTLB);
 			if (error != H_EOK)
 				panic("map_perm_addr returned error=%ld", error);
-
+			
+			if ((nucleus_memory_start == 0) || (pa < nucleus_memory_start))
+				nucleus_memory_start = pa;
 			nucleus_mappings[permanent_mappings++] = pa;
 			nucleus_memory += PAGE_SIZE_4M;
 #ifdef SMP
 			mp_add_nucleus_mapping(translations[i].om_start, 
 					       pa | TTE_KERNEL | VTD_4M);
 #endif
-			for (j = 0, k = 0; phys_avail[j + 2] != 0; j += 2, k += 2) {
+		}  
+	}
+
+	/*
+	 * Find out what physical memory is available from the prom and
+	 * initialize the phys_avail array.  This must be done before
+	 * pmap_bootstrap_alloc is called.
+	 */
+	if ((pmem = OF_finddevice("/memory")) == -1)
+		panic("pmap_bootstrap: finddevice /memory");
+	if ((sz = OF_getproplen(pmem, "available")) == -1)
+		panic("pmap_bootstrap: getproplen /memory/available");
+	if (sizeof(vm_paddr_t)*128 < sz) /* FIXME */
+		panic("pmap_bootstrap: phys_avail too small");
+	if (sizeof(mra) < sz)
+		panic("pmap_bootstrap: mra too small");
+	bzero(mra, sz);
+	if (OF_getprop(pmem, "available", mra, sz) == -1)
+		panic("pmap_bootstrap: getprop /memory/available");
+
+	sz /= sizeof(*mra);
+	CTR0(KTR_PMAP, "pmap_bootstrap: physical memory");
+
+	qsort(mra, sz, sizeof (*mra), mr_cmp);
+	physsz = 0;
+
+        if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable))
+                physmem = atop(physmem_tunable);
+
+	KDPRINTF("desired physmem=0x%lx\n", physmem_tunable);
+	for (i = 0, j = 0; i < sz; i++) {
+		vm_paddr_t start = mra[i].mr_start;
+		uint64_t size = mra[i].mr_size;
+		CTR2(KTR_PMAP, "start=%#lx size=%#lx\n", mra[i].mr_start, mra[i].mr_size);
+		KDPRINTF("start=%#lx size=%#lx\n", mra[i].mr_start, mra[i].mr_size);
+		if (nucleus_memory_start == mra[i].mr_start) {
+			mra[i].mr_start += 2*PAGE_SIZE_4M;
+			mra[i].mr_size -= 2*PAGE_SIZE_4M;
+		}
+		if (nucleus_memory_start == (start + size - 2*PAGE_SIZE_4M)) 
+			mra[i].mr_size -= 2*PAGE_SIZE_4M;
 
-				if (pa == phys_avail[j]) {
-					phys_avail_tmp[k] = phys_avail[j] + PAGE_SIZE_4M;
-					phys_avail_tmp[k + 1] = phys_avail[j + 1];
-					break;
-				} else if (phys_avail[j] < pa && pa + PAGE_SIZE_4M == phys_avail[j + 1]) {
-					phys_avail_tmp[k] = phys_avail[j];
-					phys_avail_tmp[k + 1] = pa;
-				} else if (phys_avail[j] < pa && pa + PAGE_SIZE_4M < phys_avail[j + 1]) {
-					phys_avail_tmp[k] = phys_avail[j];
-					phys_avail_tmp[k + 1] = pa;
-					phys_avail_tmp[k + 2] = pa + PAGE_SIZE_4M;
-					phys_avail_tmp[k + 3] = phys_avail[j + 1];
-					k += 2;
-				} else {
-					phys_avail_tmp[k] = phys_avail[j];
-					phys_avail_tmp[k + 1] = phys_avail[j + 1];
-				}
+		if ((nucleus_memory_start > start) && (nucleus_memory_start < (start + size))) {
+			uint64_t firstsize = (nucleus_memory_start - start);
+			phys_avail[j] = start;
+			if ((physmem_tunable != 0) && ((physsz + firstsize) > physmem_tunable)) {
+				phys_avail[j+1] = start + (physmem_tunable - physsz);
+				physsz = physmem_tunable;
+				break;
 			}
-
-			for (j = 0; phys_avail_tmp[j + 2] != 0; phys_avail[j] = phys_avail_tmp[j],
-				     phys_avail[j + 1] = phys_avail_tmp[j + 1], j += 2)
-				;
-		}  
+			phys_avail[j+1] = nucleus_memory_start;
+			size = size - firstsize - 2*PAGE_SIZE_4M;
+			mra[i].mr_start =  nucleus_memory_start + 2*PAGE_SIZE_4M;
+			mra[i].mr_size = size;
+			physsz += firstsize + 2*PAGE_SIZE_4M;
+			j += 2;
+		}
+		if (mra[i].mr_size < PAGE_SIZE_4M)
+			continue;
+		if ((mra[i].mr_start & PAGE_MASK_4M) && (mra[i].mr_size < 2*PAGE_SIZE_4M))
+			continue;
+		if (mra[i].mr_start & PAGE_MASK_4M) {
+			uint64_t newstart, roundup;
+			newstart = ((mra[i].mr_start + (PAGE_SIZE_4M-1)) & ~PAGE_MASK_4M);
+			roundup = newstart - mra[i].mr_start;
+			mra[i].mr_size -= roundup;
+			mra[i].mr_start = newstart;
+		}
+		mra[i].mr_size &= ~PAGE_MASK_4M;
+		phys_avail[j] = mra[i].mr_start;
+		if (physmem_tunable != 0 && ((physsz + mra[i].mr_size) >= physmem_tunable)) {
+			size = physmem_tunable - physsz;
+			phys_avail[j + 1] = mra[i].mr_start + size;
+			physsz = physmem_tunable;
+			break;
+		}
+		phys_avail[j + 1] = mra[i].mr_start + mra[i].mr_size;
+		physsz += mra[i].mr_size;
+		j += 2;
 	}
+	phys_avail_start = phys_avail[0];
+	physmem = btoc(physsz);
+	
 
+	for (i = 0; phys_avail[i] != 0; i += 2)
+		KDPRINTF("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
+			i, phys_avail[i], i+1, phys_avail[i+1]);
 	/*
 	 * Calculate the size of kernel virtual memory, and the size and mask
 	 * for the kernel tsb.
@@ -609,7 +626,7 @@
 	 * currently (not by design) used for permanent mappings
 	 */
 	
-	tsb_4m_size = (virtsz >> (PAGE_SHIFT_4M - TTE_SHIFT));
+	tsb_4m_size = (virtsz >> (PAGE_SHIFT_4M - TTE_SHIFT)) << 3;
 	pa = pmap_bootstrap_alloc(tsb_4m_size);
 
 	KDPRINTF("tsb_4m_pa is 0x%lx tsb_4m_size is 0x%lx\n", pa, tsb_4m_size);
@@ -684,22 +701,35 @@
 		}
 	}
 
+	error = hv_set_ctx0(MAX_TSB_INFO, vtophys((vm_offset_t)&kernel_td));
+	if (error != H_EOK)
+		panic("failed to set ctx0 TSBs error: %ld", error);
 
+#ifdef SMP
+	mp_set_tsb_desc_ra(vtophys((vm_offset_t)&kernel_td));
+#endif
 	/*
 	 * setup direct mappings
 	 * 
 	 */
-	pa = PAGE_SIZE_4M;
-	for (i = 0; phys_avail[i] != 0; i += 2) 
-		for (; pa < phys_avail[i + 1]; pa += PAGE_SIZE_4M) 
+	i = 0;
+	pa = phys_avail_start;
+	do {
+		for (; pa < phys_avail[i + 1]; pa += PAGE_SIZE_4M) {
+			tsb_assert_invalid(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa));
 			tsb_set_tte_real(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa), 
 					 pa | TTE_KERNEL | VTD_4M, 0);
-	
-        for (i = 0; i < 2; i++) {
+				
+		}
+		i += 2;
+		pa = phys_avail[i];
+	} while (pa != 0);
+
+	for (i = 0; i < 2; i++) {
                 pa = nucleus_mappings[i];
+		tsb_assert_invalid(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa));
                 tsb_set_tte_real(&kernel_td[TSB4M_INDEX], TLB_PHYS_TO_DIRECT(pa),
 				 pa | TTE_KERNEL | VTD_4M, 0);
-
 	}
 	/*
 	 * Get the available physical memory ranges from /memory/reg. These
@@ -722,14 +752,6 @@
 
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 
-	error = hv_set_ctx0(MAX_TSB_INFO, vtophys((vm_offset_t)&kernel_td));
-	if (error != H_EOK)
-		panic("failed to set ctx0 TSBs error: %ld", error);
-
-#ifdef SMP
-	mp_set_tsb_desc_ra(vtophys((vm_offset_t)&kernel_td));
-#endif
-
 	/* 
 	 * This could happen earlier - but I put it here to avoid 
 	 * attempts to do updates until they're legal



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200607022058.k62Kwjvl082737>