Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 25 Mar 2013 13:33:06 +0000 (UTC)
From:      "Cherry G. Mathew" <cherry@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r248703 - in projects/amd64_xen_pv/sys/amd64: amd64 include xen
Message-ID:  <201303251333.r2PDX6a9028719@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: cherry
Date: Mon Mar 25 13:33:06 2013
New Revision: 248703
URL: http://svnweb.freebsd.org/changeset/base/248703

Log:
  This commit combines the following features:
   i) Context switch now DTRT and maps in userspace into a separate kernel VA
   ii) Supporting pmap_xxx() functions for boot progress have been added.
   iii) Boottime \"dummy\" user page tables have been temporarily disabled.
  
   Approved by: gibbs(implicit)

Modified:
  projects/amd64_xen_pv/sys/amd64/amd64/cpu_switch.S
  projects/amd64_xen_pv/sys/amd64/amd64/vm_machdep.c
  projects/amd64_xen_pv/sys/amd64/include/pmap.h
  projects/amd64_xen_pv/sys/amd64/xen/mm.c
  projects/amd64_xen_pv/sys/amd64/xen/pmap.c

Modified: projects/amd64_xen_pv/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/amd64/cpu_switch.S	Mon Mar 25 13:23:47 2013	(r248702)
+++ projects/amd64_xen_pv/sys/amd64/amd64/cpu_switch.S	Mon Mar 25 13:33:06 2013	(r248703)
@@ -138,7 +138,6 @@ ctx_switch_xsave:
 
 	/* Save is done.  Now fire up new thread. Leave old vmspace. */
 	movq	TD_PCB(%rsi),%r8
-
 	/* switch address space */
 	movq	PCB_CR3(%r8),%rcx
 	movq	%cr3,%rax
@@ -155,9 +154,23 @@ swinact:
 	pushq	%rdx
 	pushq	%r8
 
-	movq	%rcx, %rdi
-	callq	xen_pt_switch
+	/*
+	 * On xen, the hypervisor loads %cr3 for us on return to
+	 * userland. We use a separate "kernel space" for kernel mode,
+	 * which is setup at boot time (see: pmap.c:pmap_bootstrap)
+	 *
+	 * We need to tell the hypervisor via xen_pt_user_switch()
+	 * about the new user pmap. Additionally, we modify the kernel VA
+	 * space by copying in the userland bits of the new pmap, in
+	 * case the kernel needs to access them.
+	 */
 
+	movq	TD_PROC(%rsi), %rdx		/* newproc */
+	movq	P_VMSPACE(%rdx), %rdx
+	addq	$VM_PMAP, %rdx
+	movq	%rdx, %rdi
+	callq	pmap_xen_userload
+	
 	popq	%r8
 	popq	%rdx
 	popq	%rsi

Modified: projects/amd64_xen_pv/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/amd64/vm_machdep.c	Mon Mar 25 13:23:47 2013	(r248702)
+++ projects/amd64_xen_pv/sys/amd64/amd64/vm_machdep.c	Mon Mar 25 13:33:06 2013	(r248703)
@@ -220,7 +220,7 @@ cpu_fork(td1, p2, td2, flags)
 	 */
 	pmap2 = vmspace_pmap(p2->p_vmspace);
 #ifdef XEN
-	pcb2->pcb_cr3 = pmap_kextract((vm_offset_t)pmap2->pm_pml4);
+	pcb2->pcb_cr3 = pmap_kextract_ma((vm_offset_t)pmap2->pm_pml4);
 #else
 	pcb2->pcb_cr3 = DMAP_TO_PHYS((vm_offset_t)pmap2->pm_pml4);
 #endif

Modified: projects/amd64_xen_pv/sys/amd64/include/pmap.h
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/include/pmap.h	Mon Mar 25 13:23:47 2013	(r248702)
+++ projects/amd64_xen_pv/sys/amd64/include/pmap.h	Mon Mar 25 13:33:06 2013	(r248703)
@@ -365,6 +365,7 @@ void	pmap_invalidate_all(pmap_t);
 void	pmap_invalidate_cache(void);
 void	pmap_invalidate_cache_pages(vm_page_t *pages, int count);
 void	pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva);
+void	pmap_xen_userload(pmap_t);
 
 #endif /* _KERNEL */
 

Modified: projects/amd64_xen_pv/sys/amd64/xen/mm.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/mm.c	Mon Mar 25 13:23:47 2013	(r248702)
+++ projects/amd64_xen_pv/sys/amd64/xen/mm.c	Mon Mar 25 13:33:06 2013	(r248703)
@@ -163,12 +163,13 @@ xen_invlpg(vm_offset_t va)
 inline void
 xen_load_cr3(u_long val)
 {
-	xen_pt_switch(val);
+	xen_pt_switch(xpmap_ptom(val));
 }
 
 void
-xen_pt_switch(vm_paddr_t kpml4phys)
+xen_pt_switch(vm_paddr_t kpml4mach)
 {
+	printk("%s: kpml4mach == 0x%lx\n", __func__, kpml4mach);
 	struct mmuext_op op;
 #ifdef INVARIANTS
 	SET_VCPU();
@@ -176,12 +177,12 @@ xen_pt_switch(vm_paddr_t kpml4phys)
 	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
 #endif
 	op.cmd = MMUEXT_NEW_BASEPTR;
-	op.arg1.mfn = xpmap_ptom(kpml4phys) >> PAGE_SHIFT;
+	op.arg1.mfn = kpml4mach >> PAGE_SHIFT;
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 
 void
-xen_pt_user_switch(vm_paddr_t upml4phys)
+xen_pt_user_switch(vm_paddr_t upml4mach)
 {
 	struct mmuext_op op;
 #ifdef INVARIANTS
@@ -190,7 +191,7 @@ xen_pt_user_switch(vm_paddr_t upml4phys)
 	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
 #endif
 	op.cmd = MMUEXT_NEW_USER_BASEPTR;
-	op.arg1.mfn = xpmap_ptom(upml4phys) >> PAGE_SHIFT;
+	op.arg1.mfn = upml4mach >> PAGE_SHIFT;
 	PANIC_IF(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
 }
 

Modified: projects/amd64_xen_pv/sys/amd64/xen/pmap.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/pmap.c	Mon Mar 25 13:23:47 2013	(r248702)
+++ projects/amd64_xen_pv/sys/amd64/xen/pmap.c	Mon Mar 25 13:33:06 2013	(r248703)
@@ -132,6 +132,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/smp.h>
 #endif
 
+#include <sys/proc.h>
+#include <sys/sched.h>
+
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
@@ -511,7 +514,7 @@ pmap_xen_bootpages(vm_paddr_t *firstaddr
 
 	HYPERVISOR_shared_info = (void *) va;
 
-
+#if 0
 	/* ii) Userland page table base */
 	va = vallocpages(firstaddr, 1);
 	bzero((void *)va, PAGE_SIZE);
@@ -530,7 +533,8 @@ pmap_xen_bootpages(vm_paddr_t *firstaddr
 	xen_pgdir_pin(phystomach(VTOP(va)));
 
 	/* Register user page table with Xen */
-	xen_pt_user_switch(VTOP(va));
+	xen_pt_user_switch(xpmap_ptom(VTOP(va)));
+#endif
 }
 
 /* Boot time ptov - xen guarantees bootpages to be offset */
@@ -578,7 +582,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	create_boot_pagetables(firstaddr);
 
 	/* Switch to the new kernel tables */
-	xen_pt_switch(VTOP(KPML4phys));
+	xen_pt_switch(xpmap_ptom(VTOP(KPML4phys)));
 
 	/* Unpin old page table hierarchy, and mark all its pages r/w */
 	xen_pgdir_unpin(phystomach(VTOP(xen_start_info->pt_base)));
@@ -774,38 +778,22 @@ pmap_pinit(pmap_t pmap)
 	 */
 	pmap->pm_pml4 = (void *) kmem_alloc(kernel_map, PAGE_SIZE);
 	bzero(pmap->pm_pml4, PAGE_SIZE);
-
+	printf("%s: pmap->pm_pml4 == %p\n", __func__, pmap->pm_pml4);
 	/* 
 	 * We do not wire in kernel space, or the self-referencial
-	 * entry in userspace pmaps for two reasons:
-	 * i)  both kernel and userland run in ring3 (same CPU
-	 *     privilege level). This means that userland that has kernel
-	 *     address space mapped in, can access kernel memory!
-	 *     Instead, we make the kernel pmap is exclusive and
-	 *     unshared, and we switch to it on *every* kernel
-	 *     entry. This is facilitated by the hypervisor.
-	 * ii) we access the user pmap from within kernel VA. The
-	 *     self-referencing entry is useful if we access the pmap
-	 *     from the *user* VA.
-	 * XXX: review this when userland is up.
+	 * entry in userspace pmaps becase both kernel and userland
+	 * share ring3 privilege. The user/kernel context switch is
+	 * arbitrated by the hypervisor by means of pre-loaded values
+	 * for kernel and user %cr3. The userland parts of kernel VA
+	 * may be conditionally overlaid with the VA of curthread,
+	 * since the kernel occasionally needs to access userland
+	 * process VA space.
 	 */
 
-#if 1 /* XXX: DEBUG ONLY - EXPOSES KERNEL TO USERLAND - TERRIBLE SECURITY RISK! */
-	/* Wire in kernel global address entries. */
-	pmap->pm_pml4[KPML4I] = phystomach(VTOP(KPDPphys)) | PG_RW | PG_V | PG_U;
-
-	/* Copy over Direct mapping entries, from kernel_pmap. */
-	int i;
-	for (i = 0; i < NDMPML4E; i++) {
-		pmap->pm_pml4[DMPML4I + i] = ((pdp_entry_t *)KPML4phys)[DMPML4I + i];
-	}
-
 	pmap_xen_setpages_ro((uintptr_t)pmap->pm_pml4, 1);
 
 	xen_pgdir_pin(phystomach(ptmb_vtop((uintptr_t)pmap->pm_pml4)));
 
-#endif
-
 	pmap->pm_root = NULL;
 	CPU_ZERO(&pmap->pm_active);
 	pmap_pv_pmap_init(pmap);
@@ -814,14 +802,33 @@ pmap_pinit(pmap_t pmap)
 	return 1;
 }
 
+void pmap_xen_userload(pmap_t pmap)
+{
+	KASSERT(pmap != kernel_pmap, 
+		("Kernel pmap requested on user load.\n"));
+
+	printf("%s: pmap->pm_pml4 == %p\n", __func__, pmap->pm_pml4);
+	printf("%s: curthread %s\n", __func__, curthread->td_name);
+	int i;
+	for (i = 0; i < NUPML4E; i++) {
+		pml4_entry_t pml4e;
+		pml4e = (pmap->pm_pml4[i]);
+		PT_SET_VA_MA((pml4_entry_t *)KPML4phys + i, pml4e, false);
+	}
+	PT_UPDATES_FLUSH();
+
+	/* Tell xen about user pmap switch */
+	xen_pt_user_switch(vtomach(pmap->pm_pml4));
+}
+
 void
 pmap_release(pmap_t pmap)
 {
 	KASSERT(0, ("XXX: %s: TODO\n", __func__));
 }
 
-pt_entry_t *
-vtopte_hold(uintptr_t va, void *addr)
+static pt_entry_t *
+pmap_vtopte_hold(pmap_t pmap, uintptr_t va, void *addr)
 {
 	KASSERT(addr != NULL, ("addr == NULL"));
 
@@ -850,16 +857,28 @@ vtopte_hold(uintptr_t va, void *addr)
 	return pte;
 }
 
-void
-vtopte_release(uintptr_t va, void *addr)
+pt_entry_t *
+vtopte_hold(uintptr_t va, void *addr)
+{
+	return pmap_vtopte_hold(kernel_pmap, va, addr);
+}
+
+static void
+pmap_vtopte_release(pmap_t pmap, uintptr_t va, void *addr)
 {
 	mmu_map_t tptr = *(mmu_map_t *)addr;
 
-	mmu_map_release_va(kernel_pmap, tptr, va);
+	mmu_map_release_va(pmap, tptr, va);
 	mmu_map_t_fini(tptr);
 
 }
 
+void
+vtopte_release(uintptr_t va, void *addr)
+{
+	pmap_vtopte_release(kernel_pmap, va, addr);
+}
+
 #ifdef SMP
 void pmap_lazyfix_action(void);
 
@@ -943,6 +962,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, 
     vm_prot_t prot, boolean_t wired)
 {
 	va = trunc_page(va);
+
 	KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)",
@@ -998,10 +1018,57 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
 	pmap_qremove(sva, atop(eva - sva));
 }
 
+static bool
+pv_remove(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+	pt_entry_t *pte, tpte;
+
+	char tbuf[tsz]; /* Safe to do this on the stack since tsz is
+			 * effectively const.
+			 */
+
+	mmu_map_t tptr = tbuf;
+
+	PMAP_LOCK(pmap);
+	pte = pmap_vtopte_hold(pmap, va, &tptr);
+	tpte = *pte;
+	PT_CLEAR_VA(pte, TRUE);
+	if (tpte & PG_A)
+		vm_page_aflag_set(m, PGA_REFERENCED);
+
+	/*
+	 * Update the vm_page_t clean and reference bits.
+	 */
+	if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+		vm_page_dirty(m);
+
+	/* XXX: Tell mmu_xxx about backing page */
+	pmap_vtopte_release(pmap, va, &tptr);
+
+	pmap_invalidate_page(pmap, va);
+	PMAP_UNLOCK(pmap);
+
+	return false;
+}
+
+/*
+ *	Routine:	pmap_remove_all
+ *	Function:
+ *		Removes this physical page from
+ *		all physical maps in which it resides.
+ *		Reflects back modify bits to the pager.
+ */
 void
 pmap_remove_all(vm_page_t m)
 {
-	KASSERT(0, ("XXX: %s: TODO\n", __func__));
+
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_remove_all: page %p is not managed", m));
+
+	pmap_pv_iterate(m, pv_remove);
+
+	/* free pv entry from all pmaps */
+	pmap_pv_page_unmap(m);
 }
 
 vm_paddr_t 
@@ -1225,7 +1292,21 @@ pmap_protect(pmap_t pmap, vm_offset_t sv
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	KASSERT(0, ("XXX: %s: TODO\n", __func__));
+
+	vm_offset_t addr;
+	/* XXX: TODO SMP */
+	sched_pin();
+
+	for (addr = sva; addr < eva; addr += PAGE_SIZE)
+		invlpg(addr);
+
+	sched_unpin();
+}
+
+void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+	pmap_invalidate_range(pmap, va, va + PAGE_SIZE);
 }
 
 void
@@ -1288,11 +1369,21 @@ pmap_page_set_memattr(vm_page_t m, vm_me
 	KASSERT(0, ("XXX: %s: TODO\n", __func__));
 }
 
+static bool
+pv_dummy(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+	printf("%s: va == 0x%lx, pa == 0x%lx\n",
+	       __func__, va, VM_PAGE_TO_PHYS(m));
+	return true; /* stop at the first iteration */
+}
+
 boolean_t
 pmap_page_is_mapped(vm_page_t m)
 {
-	KASSERT(0, ("XXX: %s: TODO\n", __func__));
-	return 0;
+	if ((m->oflags & VPO_UNMANAGED) != 0)
+		return (FALSE);
+	printf("pmap_pv_iterate(m, pv_dummy) == %d\n", pmap_pv_iterate(m, pv_dummy));
+	return pmap_pv_iterate(m, pv_dummy);
 }
 
 boolean_t
@@ -1342,10 +1433,55 @@ pmap_clear_reference(vm_page_t m)
 	KASSERT(0, ("XXX: %s: TODO\n", __func__));
 }
 
+/* Callback to remove write access on given va and pmap */
+static bool
+pv_remove_write(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+
+	pt_entry_t oldpte, *pte;
+	char tbuf[tsz]; /* Safe to do this on the stack since tsz is
+			 * effectively const.
+			 */
+
+	mmu_map_t tptr = tbuf;
+
+	PMAP_LOCK(pmap);
+	pte = pmap_vtopte_hold(pmap, va, &tptr);
+
+	oldpte = *pte;
+	if (oldpte & PG_RW) {
+		PT_SET_MA(va, oldpte & ~(PG_RW | PG_M));
+		if ((oldpte & PG_M) != 0)
+			vm_page_dirty(m);
+		pmap_invalidate_page(pmap, va);
+	}
+	pmap_vtopte_release(pmap, va, &tptr);
+	PMAP_UNLOCK(pmap);
+
+	return false; /* Iterate through every mapping */
+}
+
+/*
+ * Clear the write and modified bits in each of the given page's mappings.
+ */
 void
 pmap_remove_write(vm_page_t m)
 {
-	KASSERT(0, ("XXX: %s: TODO\n", __func__));
+	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+	    ("pmap_remove_write: page %p is not managed", m));
+
+	/*
+	 * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by
+	 * another thread while the object is locked.  Thus, if PGA_WRITEABLE
+	 * is clear, no page table entries need updating.
+	 */
+	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
+	if ((m->oflags & VPO_BUSY) == 0 &&
+	    (m->aflags & PGA_WRITEABLE) == 0)
+		return;
+
+	pmap_pv_iterate(m, pv_remove_write);
+	vm_page_aflag_clear(m, PGA_WRITEABLE);
 }
 
 int



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201303251333.r2PDX6a9028719>