Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 Sep 2016 10:59:33 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Slawa Olhovchenkov <slw@zxy.spb.ru>
Cc:        John Baldwin <jhb@freebsd.org>, freebsd-stable@freebsd.org, alc@freebsd.org
Subject:   Re: nginx and FreeBSD11
Message-ID:  <20160922075933.GL38409@kib.kiev.ua>
In-Reply-To: <20160920211517.GJ38409@kib.kiev.ua>
References:  <20160907191348.GD22212@zxy.spb.ru> <1823460.vTm8IvUQsF@ralph.baldwin.cx> <20160918162241.GE2960@zxy.spb.ru> <2122051.7RxZBKUSFc@ralph.baldwin.cx> <20160920065244.GO2840@zxy.spb.ru> <20160920192053.GP2840@zxy.spb.ru> <20160920201925.GI38409@kib.kiev.ua> <20160920203853.GR2840@zxy.spb.ru> <20160920211517.GJ38409@kib.kiev.ua>

next in thread | previous in thread | raw e-mail | index | archive | help
On Wed, Sep 21, 2016 at 12:15:17AM +0300, Konstantin Belousov wrote:
> > > diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
> > > index a23468e..f754652 100644
> > > --- a/sys/vm/vm_map.c
> > > +++ b/sys/vm/vm_map.c
> > > @@ -481,6 +481,7 @@ vmspace_switch_aio(struct vmspace *newvm)
> > >  	if (oldvm == newvm)
> > >  		return;
> > >  
> > > +	spinlock_enter();
> > >  	/*
> > >  	 * Point to the new address space and refer to it.
> > >  	 */
> > > @@ -489,6 +490,7 @@ vmspace_switch_aio(struct vmspace *newvm)
> > >  
> > >  	/* Activate the new mapping. */
> > >  	pmap_activate(curthread);
> > > +	spinlock_exit();
> > >  
> > >  	/* Remove the daemon's reference to the old address space. */
> > >  	KASSERT(oldvm->vm_refcnt > 1,
Did you tested the patch ?

Below is, I believe, the committable fix, of course supposing that
the patch above worked. If you want to retest it on stable/11, ignore
efirt.c chunks.

diff --git a/sys/amd64/amd64/efirt.c b/sys/amd64/amd64/efirt.c
index f1d67f7..c883af8 100644
--- a/sys/amd64/amd64/efirt.c
+++ b/sys/amd64/amd64/efirt.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/vmparam.h>
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
@@ -301,6 +302,17 @@ efi_enter(void)
 		PMAP_UNLOCK(curpmap);
 		return (error);
 	}
+
+	/*
+	 * IPI TLB shootdown handler invltlb_pcid_handler() reloads
+	 * %cr3 from the curpmap->pm_cr3, which would disable runtime
+	 * segments mappings.  Block the handler's action by setting
+	 * curpmap to impossible value.  See also comment in
+	 * pmap.c:pmap_activate_sw().
+	 */
+	if (pmap_pcid_enabled && !invpcid_works)
+		PCPU_SET(curpmap, NULL);
+
 	load_cr3(VM_PAGE_TO_PHYS(efi_pml4_page) | (pmap_pcid_enabled ?
 	    curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0));
 	/*
@@ -317,7 +329,9 @@ efi_leave(void)
 {
 	pmap_t curpmap;
 
-	curpmap = PCPU_GET(curpmap);
+	curpmap = &curproc->p_vmspace->vm_pmap;
+	if (pmap_pcid_enabled && !invpcid_works)
+		PCPU_SET(curpmap, curpmap);
 	load_cr3(curpmap->pm_cr3 | (pmap_pcid_enabled ?
 	    curpmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid : 0));
 	if (!pmap_pcid_enabled)
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 63042e4..59e1b67 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -6842,6 +6842,7 @@ pmap_activate_sw(struct thread *td)
 {
 	pmap_t oldpmap, pmap;
 	uint64_t cached, cr3;
+	register_t rflags;
 	u_int cpuid;
 
 	oldpmap = PCPU_GET(curpmap);
@@ -6865,16 +6866,43 @@ pmap_activate_sw(struct thread *td)
 		    pmap == kernel_pmap,
 		    ("non-kernel pmap thread %p pmap %p cpu %d pcid %#x",
 		    td, pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid));
+
+		/*
+		 * If the INVPCID instruction is not available,
+		 * invltlb_pcid_handler() is used for handle
+		 * invalidate_all IPI, which checks for curpmap ==
+		 * smp_tlb_pmap.  Below operations sequence has a
+		 * window where %CR3 is loaded with the new pmap's
+		 * PML4 address, but curpmap value is not yet updated.
+		 * This causes invltlb IPI handler, called between the
+		 * updates, to execute as NOP, which leaves stale TLB
+		 * entries.
+		 *
+		 * Note that the most typical use of
+		 * pmap_activate_sw(), from the context switch, is
+		 * immune to this race, because interrupts are
+		 * disabled (while the thread lock is owned), and IPI
+		 * happends after curpmap is updated.  Protect other
+		 * callers in a similar way, by disabling interrupts
+		 * around the %cr3 register reload and curpmap
+		 * assignment.
+		 */
+		if (!invpcid_works)
+			rflags = intr_disable();
+
 		if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) {
 			load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid |
 			    cached);
 			if (cached)
 				PCPU_INC(pm_save_cnt);
 		}
+		PCPU_SET(curpmap, pmap);
+		if (!invpcid_works)
+			intr_restore(rflags);
 	} else if (cr3 != pmap->pm_cr3) {
 		load_cr3(pmap->pm_cr3);
+		PCPU_SET(curpmap, pmap);
 	}
-	PCPU_SET(curpmap, pmap);
 #ifdef SMP
 	CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
 #else



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20160922075933.GL38409>