Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 11 Sep 2017 08:48:36 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r323431 - in stable/11/sys: amd64/amd64 amd64/include amd64/vmm/intel sys x86/x86
Message-ID:  <201709110848.v8B8maIj026041@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Mon Sep 11 08:48:36 2017
New Revision: 323431
URL: https://svnweb.freebsd.org/changeset/base/323431

Log:
  MFC r322762, r322799, r322832, r322833:
  Make WRFSBASE and WRGSBASE instructions functional.
  
  Bump stable/11 __FreeBSD_version.

Modified:
  stable/11/sys/amd64/amd64/cpu_switch.S
  stable/11/sys/amd64/amd64/exception.S
  stable/11/sys/amd64/amd64/machdep.c
  stable/11/sys/amd64/amd64/ptrace_machdep.c
  stable/11/sys/amd64/amd64/sys_machdep.c
  stable/11/sys/amd64/amd64/vm_machdep.c
  stable/11/sys/amd64/include/asmacros.h
  stable/11/sys/amd64/include/pcb.h
  stable/11/sys/amd64/vmm/intel/vmx_msr.c
  stable/11/sys/sys/param.h
  stable/11/sys/x86/x86/identcpu.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- stable/11/sys/amd64/amd64/cpu_switch.S	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/amd64/amd64/cpu_switch.S	Mon Sep 11 08:48:36 2017	(r323431)
@@ -87,7 +87,6 @@ END(cpu_throw)
 ENTRY(cpu_switch)
 	/* Switch to new thread.  First, save context. */
 	movq	TD_PCB(%rdi),%r8
-	orl	$PCB_FULL_IRET,PCB_FLAGS(%r8)
 
 	movq	(%rsp),%rax			/* Hardware registers */
 	movq	%r15,PCB_R15(%r8)
@@ -99,6 +98,30 @@ ENTRY(cpu_switch)
 	movq	%rbx,PCB_RBX(%r8)
 	movq	%rax,PCB_RIP(%r8)
 
+	testl	$PCB_FULL_IRET,PCB_FLAGS(%r8)
+	jnz	2f
+	orl	$PCB_FULL_IRET,PCB_FLAGS(%r8)
+	testl	$TDP_KTHREAD,TD_PFLAGS(%rdi)
+	jnz	2f
+	testb	$CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+	jz	2f
+	movl	%fs,%eax
+	cmpl	$KUF32SEL,%eax
+	jne	1f
+	rdfsbaseq %rax
+	movq	%rax,PCB_FSBASE(%r8)
+1:	movl	%gs,%eax
+	cmpl	$KUG32SEL,%eax
+	jne	2f
+	movq	%rdx,%r12
+	movl	$MSR_KGSBASE,%ecx		/* Read user gs base */
+	rdmsr
+	shlq	$32,%rdx
+	orq	%rdx,%rax
+	movq	%rax,PCB_GSBASE(%r8)
+	movq	%r12,%rdx
+
+2:
 	testl	$PCB_DBREGS,PCB_FLAGS(%r8)
 	jnz	store_dr			/* static predict not taken */
 done_store_dr:

Modified: stable/11/sys/amd64/amd64/exception.S
==============================================================================
--- stable/11/sys/amd64/amd64/exception.S	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/amd64/amd64/exception.S	Mon Sep 11 08:48:36 2017	(r323431)
@@ -187,12 +187,13 @@ alltraps_testi:
 	jz	alltraps_pushregs_no_rdi
 	sti
 alltraps_pushregs_no_rdi:
-	movq	%rsi,TF_RSI(%rsp)
 	movq	%rdx,TF_RDX(%rsp)
+	movq	%rax,TF_RAX(%rsp)
+alltraps_pushregs_no_rax:
+	movq	%rsi,TF_RSI(%rsp)
 	movq	%rcx,TF_RCX(%rsp)
 	movq	%r8,TF_R8(%rsp)
 	movq	%r9,TF_R9(%rsp)
-	movq	%rax,TF_RAX(%rsp)
 	movq	%rbx,TF_RBX(%rsp)
 	movq	%rbp,TF_RBP(%rsp)
 	movq	%r10,TF_R10(%rsp)
@@ -326,31 +327,53 @@ IDTVEC(prot)
 prot_addrf:
 	movq	$0,TF_ADDR(%rsp)
 	movq	%rdi,TF_RDI(%rsp)	/* free up a GP register */
+	movq	%rax,TF_RAX(%rsp)
+	movq	%rdx,TF_RDX(%rsp)
+	movw	%fs,TF_FS(%rsp)
+	movw	%gs,TF_GS(%rsp)
 	leaq	doreti_iret(%rip),%rdi
 	cmpq	%rdi,TF_RIP(%rsp)
-	je	1f			/* kernel but with user gsbase!! */
+	je	5f			/* kernel but with user gsbase!! */
 	testb	$SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */
-	jz	2f			/* already running with kernel GS.base */
-1:	swapgs
-2:	movq	PCPU(CURPCB),%rdi
-	orl	$PCB_FULL_IRET,PCB_FLAGS(%rdi)	/* always full iret from GPF */
-	movw	%fs,TF_FS(%rsp)
-	movw	%gs,TF_GS(%rsp)
+	jz	6f			/* already running with kernel GS.base */
+	testb	$CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+	jz	2f
+	cmpw	$KUF32SEL,TF_FS(%rsp)
+	jne	1f
+	rdfsbaseq %rax
+1:	cmpw	$KUG32SEL,TF_GS(%rsp)
+	jne	2f
+	rdgsbaseq %rdx
+2:	swapgs
+	movq	PCPU(CURPCB),%rdi
+	testb	$CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+	jz	4f
+	cmpw	$KUF32SEL,TF_FS(%rsp)
+	jne	3f
+	movq	%rax,PCB_FSBASE(%rdi)
+3:	cmpw	$KUG32SEL,TF_GS(%rsp)
+	jne	4f
+	movq	%rdx,PCB_GSBASE(%rdi)
+4:	orl	$PCB_FULL_IRET,PCB_FLAGS(%rdi)	/* always full iret from GPF */
 	movw	%es,TF_ES(%rsp)
 	movw	%ds,TF_DS(%rsp)
 	testl	$PSL_I,TF_RFLAGS(%rsp)
-	jz	alltraps_pushregs_no_rdi
+	jz	alltraps_pushregs_no_rax
 	sti
-	jmp	alltraps_pushregs_no_rdi
+	jmp	alltraps_pushregs_no_rax
 
+5:	swapgs
+6:	movq	PCPU(CURPCB),%rdi
+	jmp	4b
+
 /*
  * Fast syscall entry point.  We enter here with just our new %cs/%ss set,
  * and the new privilige level.  We are still running on the old user stack
  * pointer.  We have to juggle a few things around to find our stack etc.
  * swapgs gives us access to our PCPU space only.
  *
- * We do not support invoking this from a custom %cs or %ss (e.g. using
- * entries from an LDT).
+ * We do not support invoking this from a custom segment registers,
+ * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT.
  */
 IDTVEC(fast_syscall)
 	swapgs
@@ -503,6 +526,23 @@ IDTVEC(nmi)
 nmi_fromuserspace:
 	incl	%ebx
 	swapgs
+	testb	$CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip)
+	jz	2f
+	movq	PCPU(CURPCB),%rdi
+	testq	%rdi,%rdi
+	jz	2f
+	cmpw	$KUF32SEL,TF_FS(%rsp)
+	jne	1f
+	rdfsbaseq %rax
+	movq	%rax,PCB_FSBASE(%rdi)
+1:	cmpw	$KUG32SEL,TF_GS(%rsp)
+	jne	2f
+	movl	$MSR_KGSBASE,%ecx
+	rdmsr
+	shlq	$32,%rdx
+	orq	%rdx,%rax
+	movq	%rax,PCB_GSBASE(%rdi)
+2:
 /* Note: this label is also used by ddb and gdb: */
 nmi_calltrap:
 	FAKE_MCOUNT(TF_RIP(%rsp))
@@ -705,6 +745,7 @@ doreti_exit:
 	jz	ld_regs
 	testl	$PCB_FULL_IRET,PCB_FLAGS(%r8)
 	jz	ld_regs
+	andl	$~PCB_FULL_IRET,PCB_FLAGS(%r8)
 	testl	$TF_HASSEGS,TF_FLAGS(%rsp)
 	je	set_segs
 

Modified: stable/11/sys/amd64/amd64/machdep.c
==============================================================================
--- stable/11/sys/amd64/amd64/machdep.c	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/amd64/amd64/machdep.c	Mon Sep 11 08:48:36 2017	(r323431)
@@ -379,6 +379,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask
 	sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
 	get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
 	fpstate_drop(td);
+	update_pcb_bases(pcb);
 	sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase;
 	sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase;
 	bzero(sf.sf_uc.uc_mcontext.mc_spare,
@@ -449,7 +450,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask
 	regs->tf_fs = _ufssel;
 	regs->tf_gs = _ugssel;
 	regs->tf_flags = TF_HASSEGS;
-	set_pcb_flags(pcb, PCB_FULL_IRET);
 	PROC_LOCK(p);
 	mtx_lock(&psp->ps_mtx);
 }
@@ -555,6 +555,7 @@ sys_sigreturn(td, uap)
 		return (ret);
 	}
 	bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs));
+	update_pcb_bases(pcb);
 	pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase;
 	pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase;
 
@@ -566,7 +567,6 @@ sys_sigreturn(td, uap)
 #endif
 
 	kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0);
-	set_pcb_flags(pcb, PCB_FULL_IRET);
 	return (EJUSTRETURN);
 }
 
@@ -594,11 +594,11 @@ exec_setregs(struct thread *td, struct image_params *i
 	else
 		mtx_unlock(&dt_lock);
 	
+	update_pcb_bases(pcb);
 	pcb->pcb_fsbase = 0;
 	pcb->pcb_gsbase = 0;
 	clear_pcb_flags(pcb, PCB_32BIT);
 	pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
-	set_pcb_flags(pcb, PCB_FULL_IRET);
 
 	bzero((char *)regs, sizeof(struct trapframe));
 	regs->tf_rip = imgp->entry_addr;
@@ -2142,6 +2142,7 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int f
 	mcp->mc_flags = tp->tf_flags;
 	mcp->mc_len = sizeof(*mcp);
 	get_fpcontext(td, mcp, NULL, 0);
+	update_pcb_bases(pcb);
 	mcp->mc_fsbase = pcb->pcb_fsbase;
 	mcp->mc_gsbase = pcb->pcb_gsbase;
 	mcp->mc_xfpustate = 0;
@@ -2212,11 +2213,11 @@ set_mcontext(struct thread *td, mcontext_t *mcp)
 		tp->tf_fs = mcp->mc_fs;
 		tp->tf_gs = mcp->mc_gs;
 	}
+	set_pcb_flags(pcb, PCB_FULL_IRET);
 	if (mcp->mc_flags & _MC_HASBASES) {
 		pcb->pcb_fsbase = mcp->mc_fsbase;
 		pcb->pcb_gsbase = mcp->mc_gsbase;
 	}
-	set_pcb_flags(pcb, PCB_FULL_IRET);
 	return (0);
 }
 
@@ -2485,6 +2486,71 @@ user_dbreg_trap(void)
          * None of the breakpoints are in user space.
          */
         return 0;
+}
+
+/*
+ * The pcb_flags is only modified by current thread, or by other threads
+ * when current thread is stopped.  However, current thread may change it
+ * from the interrupt context in cpu_switch(), or in the trap handler.
+ * When we read-modify-write pcb_flags from C sources, compiler may generate
+ * code that is not atomic regarding the interrupt handler.  If a trap or
+ * interrupt happens and any flag is modified from the handler, it can be
+ * clobbered with the cached value later.  Therefore, we implement setting
+ * and clearing flags with single-instruction functions, which do not race
+ * with possible modification of the flags from the trap or interrupt context,
+ * because traps and interrupts are executed only on instruction boundary.
+ */
+void
+set_pcb_flags_raw(struct pcb *pcb, const u_int flags)
+{
+
+	__asm __volatile("orl %1,%0"
+	    : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
+	    : "cc", "memory");
+
+}
+
+/*
+ * The support for RDFSBASE, WRFSBASE and similar instructions for %gs
+ * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into
+ * pcb if user space modified the bases.  We must save on the context
+ * switch or if the return to usermode happens through the doreti.
+ *
+ * Tracking of both events is performed by the pcb flag PCB_FULL_IRET,
+ * which have a consequence that the base MSRs must be saved each time
+ * the PCB_FULL_IRET flag is set.  We disable interrupts to sync with
+ * context switches.
+ */
+void
+set_pcb_flags(struct pcb *pcb, const u_int flags)
+{
+	register_t r;
+
+	if (curpcb == pcb &&
+	    (flags & PCB_FULL_IRET) != 0 &&
+	    (pcb->pcb_flags & PCB_FULL_IRET) == 0 &&
+	    (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) {
+		r = intr_disable();
+		if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) {
+			if (rfs() == _ufssel)
+				pcb->pcb_fsbase = rdfsbase();
+			if (rgs() == _ugssel)
+				pcb->pcb_gsbase = rdmsr(MSR_KGSBASE);
+		}
+		set_pcb_flags_raw(pcb, flags);
+		intr_restore(r);
+	} else {
+		set_pcb_flags_raw(pcb, flags);
+	}
+}
+
+void
+clear_pcb_flags(struct pcb *pcb, const u_int flags)
+{
+
+	__asm __volatile("andl %1,%0"
+	    : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
+	    : "cc", "memory");
 }
 
 #ifdef KDB

Modified: stable/11/sys/amd64/amd64/ptrace_machdep.c
==============================================================================
--- stable/11/sys/amd64/amd64/ptrace_machdep.c	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/amd64/amd64/ptrace_machdep.c	Mon Sep 11 08:48:36 2017	(r323431)
@@ -117,15 +117,17 @@ cpu_ptrace_xstate(struct thread *td, int req, void *ad
 static void
 cpu_ptrace_setbase(struct thread *td, int req, register_t r)
 {
+	struct pcb *pcb;
 
+	pcb = td->td_pcb;
+	set_pcb_flags(pcb, PCB_FULL_IRET);
 	if (req == PT_SETFSBASE) {
-		td->td_pcb->pcb_fsbase = r;
+		pcb->pcb_fsbase = r;
 		td->td_frame->tf_fs = _ufssel;
 	} else {
-		td->td_pcb->pcb_gsbase = r;
+		pcb->pcb_gsbase = r;
 		td->td_frame->tf_gs = _ugssel;
 	}
-	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 }
 
 #ifdef COMPAT_FREEBSD32
@@ -136,6 +138,7 @@ static int
 cpu32_ptrace(struct thread *td, int req, void *addr, int data)
 {
 	struct savefpu *fpstate;
+	struct pcb *pcb;
 	uint32_t r;
 	int error;
 
@@ -167,8 +170,10 @@ cpu32_ptrace(struct thread *td, int req, void *addr, i
 			error = EINVAL;
 			break;
 		}
-		r = req == PT_GETFSBASE ? td->td_pcb->pcb_fsbase :
-		    td->td_pcb->pcb_gsbase;
+		pcb = td->td_pcb;
+		if (td == curthread)
+			update_pcb_bases(pcb);
+		r = req == PT_GETFSBASE ? pcb->pcb_fsbase : pcb->pcb_gsbase;
 		error = copyout(&r, addr, sizeof(r));
 		break;
 
@@ -197,6 +202,7 @@ int
 cpu_ptrace(struct thread *td, int req, void *addr, int data)
 {
 	register_t *r, rv;
+	struct pcb *pcb;
 	int error;
 
 #ifdef COMPAT_FREEBSD32
@@ -221,8 +227,10 @@ cpu_ptrace(struct thread *td, int req, void *addr, int
 
 	case PT_GETFSBASE:
 	case PT_GETGSBASE:
-		r = req == PT_GETFSBASE ? &td->td_pcb->pcb_fsbase :
-		    &td->td_pcb->pcb_gsbase;
+		pcb = td->td_pcb;
+		if (td == curthread)
+			update_pcb_bases(pcb);
+		r = req == PT_GETFSBASE ? &pcb->pcb_fsbase : &pcb->pcb_gsbase;
 		error = copyout(r, addr, sizeof(*r));
 		break;
 

Modified: stable/11/sys/amd64/amd64/sys_machdep.c
==============================================================================
--- stable/11/sys/amd64/amd64/sys_machdep.c	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/amd64/amd64/sys_machdep.c	Mon Sep 11 08:48:36 2017	(r323431)
@@ -256,39 +256,45 @@ sysarch(td, uap)
 		error = amd64_set_ioperm(td, &iargs);
 		break;
 	case I386_GET_FSBASE:
+		update_pcb_bases(pcb);
 		i386base = pcb->pcb_fsbase;
 		error = copyout(&i386base, uap->parms, sizeof(i386base));
 		break;
 	case I386_SET_FSBASE:
 		error = copyin(uap->parms, &i386base, sizeof(i386base));
 		if (!error) {
+			set_pcb_flags(pcb, PCB_FULL_IRET);
 			pcb->pcb_fsbase = i386base;
 			td->td_frame->tf_fs = _ufssel;
 			update_gdt_fsbase(td, i386base);
 		}
 		break;
 	case I386_GET_GSBASE:
+		update_pcb_bases(pcb);
 		i386base = pcb->pcb_gsbase;
 		error = copyout(&i386base, uap->parms, sizeof(i386base));
 		break;
 	case I386_SET_GSBASE:
 		error = copyin(uap->parms, &i386base, sizeof(i386base));
 		if (!error) {
+			set_pcb_flags(pcb, PCB_FULL_IRET);
 			pcb->pcb_gsbase = i386base;
 			td->td_frame->tf_gs = _ugssel;
 			update_gdt_gsbase(td, i386base);
 		}
 		break;
 	case AMD64_GET_FSBASE:
-		error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase));
+		update_pcb_bases(pcb);
+		error = copyout(&pcb->pcb_fsbase, uap->parms,
+		    sizeof(pcb->pcb_fsbase));
 		break;
 		
 	case AMD64_SET_FSBASE:
 		error = copyin(uap->parms, &a64base, sizeof(a64base));
 		if (!error) {
 			if (a64base < VM_MAXUSER_ADDRESS) {
-				pcb->pcb_fsbase = a64base;
 				set_pcb_flags(pcb, PCB_FULL_IRET);
+				pcb->pcb_fsbase = a64base;
 				td->td_frame->tf_fs = _ufssel;
 			} else
 				error = EINVAL;
@@ -296,15 +302,17 @@ sysarch(td, uap)
 		break;
 
 	case AMD64_GET_GSBASE:
-		error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase));
+		update_pcb_bases(pcb);
+		error = copyout(&pcb->pcb_gsbase, uap->parms,
+		    sizeof(pcb->pcb_gsbase));
 		break;
 
 	case AMD64_SET_GSBASE:
 		error = copyin(uap->parms, &a64base, sizeof(a64base));
 		if (!error) {
 			if (a64base < VM_MAXUSER_ADDRESS) {
-				pcb->pcb_gsbase = a64base;
 				set_pcb_flags(pcb, PCB_FULL_IRET);
+				pcb->pcb_gsbase = a64base;
 				td->td_frame->tf_gs = _ugssel;
 			} else
 				error = EINVAL;

Modified: stable/11/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- stable/11/sys/amd64/amd64/vm_machdep.c	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/amd64/amd64/vm_machdep.c	Mon Sep 11 08:48:36 2017	(r323431)
@@ -176,6 +176,7 @@ cpu_fork(td1, p2, td2, flags)
 
 	/* Ensure that td1's pcb is up to date. */
 	fpuexit(td1);
+	update_pcb_bases(td1->td_pcb);
 
 	/* Point the pcb to the top of the stack */
 	pcb2 = get_pcb_td(td2);
@@ -242,7 +243,7 @@ cpu_fork(td1, p2, td2, flags)
 	pcb2->pcb_tssp = NULL;
 
 	/* New segment registers. */
-	set_pcb_flags(pcb2, PCB_FULL_IRET);
+	set_pcb_flags_raw(pcb2, PCB_FULL_IRET);
 
 	/* Copy the LDT, if necessary. */
 	mdp1 = &td1->td_proc->p_md;
@@ -437,13 +438,14 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
 	 * Those not loaded individually below get their default
 	 * values here.
 	 */
+	update_pcb_bases(td0->td_pcb);
 	bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
 	clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE |
 	    PCB_KERNFPU);
 	pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
 	bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
 	    cpu_max_ext_state_size);
-	set_pcb_flags(pcb2, PCB_FULL_IRET);
+	set_pcb_flags_raw(pcb2, PCB_FULL_IRET);
 
 	/*
 	 * Create a new fresh stack for the new thread.

Modified: stable/11/sys/amd64/include/asmacros.h
==============================================================================
--- stable/11/sys/amd64/include/asmacros.h	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/amd64/include/asmacros.h	Mon Sep 11 08:48:36 2017	(r323431)
@@ -177,7 +177,12 @@
 	movw	%es,TF_ES(%rsp) ;					\
 	movw	%ds,TF_DS(%rsp) ;					\
 	movl	$TF_HASSEGS,TF_FLAGS(%rsp) ;				\
-	cld
+	cld ;								\
+	testb	$SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel ? */	\
+	jz	2f ;		/* yes, leave PCB_FULL_IRET alone */	\
+	movq	PCPU(CURPCB),%r8 ;					\
+	andl	$~PCB_FULL_IRET,PCB_FLAGS(%r8) ;			\
+2:
 
 #define POP_FRAME							\
 	movq	TF_RDI(%rsp),%rdi ;					\

Modified: stable/11/sys/amd64/include/pcb.h
==============================================================================
--- stable/11/sys/amd64/include/pcb.h	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/amd64/include/pcb.h	Mon Sep 11 08:48:36 2017	(r323431)
@@ -119,40 +119,15 @@ struct susppcb {
 #ifdef _KERNEL
 struct trapframe;
 
-/*
- * The pcb_flags is only modified by current thread, or by other threads
- * when current thread is stopped.  However, current thread may change it
- * from the interrupt context in cpu_switch(), or in the trap handler.
- * When we read-modify-write pcb_flags from C sources, compiler may generate
- * code that is not atomic regarding the interrupt handler.  If a trap or
- * interrupt happens and any flag is modified from the handler, it can be
- * clobbered with the cached value later.  Therefore, we implement setting
- * and clearing flags with single-instruction functions, which do not race
- * with possible modification of the flags from the trap or interrupt context,
- * because traps and interrupts are executed only on instruction boundary.
- */
-static __inline void
-set_pcb_flags(struct pcb *pcb, const u_int flags)
-{
-
-	__asm __volatile("orl %1,%0"
-	    : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags)
-	    : "cc");
-}
-
-static __inline void
-clear_pcb_flags(struct pcb *pcb, const u_int flags)
-{
-
-	__asm __volatile("andl %1,%0"
-	    : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags)
-	    : "cc");
-}
-
+void	clear_pcb_flags(struct pcb *pcb, const u_int flags);
 void	makectx(struct trapframe *, struct pcb *);
+void	set_pcb_flags(struct pcb *pcb, const u_int flags);
+void	set_pcb_flags_raw(struct pcb *pcb, const u_int flags);
 int	savectx(struct pcb *) __returns_twice;
 void	resumectx(struct pcb *);
 
+/* Ensure that pcb_gsbase and pcb_fsbase are up to date */
+#define	update_pcb_bases(pcb)	set_pcb_flags((pcb), PCB_FULL_IRET)
 #endif
 
 #endif /* _AMD64_PCB_H_ */

Modified: stable/11/sys/amd64/vmm/intel/vmx_msr.c
==============================================================================
--- stable/11/sys/amd64/vmm/intel/vmx_msr.c	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/amd64/vmm/intel/vmx_msr.c	Mon Sep 11 08:48:36 2017	(r323431)
@@ -31,10 +31,12 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/proc.h>
 
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/md_var.h>
+#include <machine/pcb.h>
 #include <machine/specialreg.h>
 #include <machine/vmm.h>
 
@@ -356,7 +358,8 @@ vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
 {
 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
 
-	/* Save host MSRs (if any) and restore guest MSRs */
+	/* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
+	update_pcb_bases(curpcb);
 	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
 	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
 	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);

Modified: stable/11/sys/sys/param.h
==============================================================================
--- stable/11/sys/sys/param.h	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/sys/param.h	Mon Sep 11 08:48:36 2017	(r323431)
@@ -58,7 +58,7 @@
  *		in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1101502	/* Master, propagated to newvers */
+#define __FreeBSD_version 1101503	/* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
@@ -84,6 +84,8 @@
 #define	P_OSREL_SHUTDOWN_ENOTCONN	1100077
 #define	P_OSREL_MAP_GUARD		1200035
 #define	P_OSREL_MAP_GUARD_11		1101501
+#define	P_OSREL_WRFSBASE		1200041
+#define	P_OSREL_WRFSBASE_11		1101503
 
 #define	P_OSREL_MAJOR(x)		((x) / 100000)
 #endif

Modified: stable/11/sys/x86/x86/identcpu.c
==============================================================================
--- stable/11/sys/x86/x86/identcpu.c	Mon Sep 11 08:18:56 2017	(r323430)
+++ stable/11/sys/x86/x86/identcpu.c	Mon Sep 11 08:48:36 2017	(r323431)
@@ -1423,18 +1423,15 @@ finishidentcpu(void)
 		cpu_stdext_feature = regs[1];
 
 		/*
-		 * Some hypervisors fail to filter out unsupported
-		 * extended features.  For now, disable the
+		 * Some hypervisors failed to filter out unsupported
+		 * extended features.  Allow to disable the
 		 * extensions, activation of which requires setting a
 		 * bit in CR4, and which VM monitors do not support.
 		 */
-		if (cpu_feature2 & CPUID2_HV) {
-			cpu_stdext_disable = CPUID_STDEXT_FSGSBASE |
-			    CPUID_STDEXT_SMEP;
-		} else
-			cpu_stdext_disable = 0;
+		cpu_stdext_disable = 0;
 		TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable);
 		cpu_stdext_feature &= ~cpu_stdext_disable;
+
 		cpu_stdext_feature2 = regs[2];
 	}
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201709110848.v8B8maIj026041>