From owner-svn-src-stable-11@freebsd.org Mon Sep 11 08:48:38 2017 Return-Path: Delivered-To: svn-src-stable-11@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 9D48BE237DD; Mon, 11 Sep 2017 08:48:38 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 6A95A779DD; Mon, 11 Sep 2017 08:48:38 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id v8B8mbtY026052; Mon, 11 Sep 2017 08:48:37 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id v8B8maIj026041; Mon, 11 Sep 2017 08:48:36 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201709110848.v8B8maIj026041@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Mon, 11 Sep 2017 08:48:36 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r323431 - in stable/11/sys: amd64/amd64 amd64/include amd64/vmm/intel sys x86/x86 X-SVN-Group: stable-11 X-SVN-Commit-Author: kib X-SVN-Commit-Paths: in stable/11/sys: amd64/amd64 amd64/include amd64/vmm/intel sys x86/x86 X-SVN-Commit-Revision: 323431 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-11@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for only the 11-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 11 Sep 2017 08:48:38 -0000 Author: kib Date: Mon Sep 11 08:48:36 2017 New Revision: 323431 URL: https://svnweb.freebsd.org/changeset/base/323431 Log: MFC r322762, r322799, r322832, r322833: Make WRFSBASE and WRGSBASE instructions functional. Bump stable/11 __FreeBSD_version. Modified: stable/11/sys/amd64/amd64/cpu_switch.S stable/11/sys/amd64/amd64/exception.S stable/11/sys/amd64/amd64/machdep.c stable/11/sys/amd64/amd64/ptrace_machdep.c stable/11/sys/amd64/amd64/sys_machdep.c stable/11/sys/amd64/amd64/vm_machdep.c stable/11/sys/amd64/include/asmacros.h stable/11/sys/amd64/include/pcb.h stable/11/sys/amd64/vmm/intel/vmx_msr.c stable/11/sys/sys/param.h stable/11/sys/x86/x86/identcpu.c Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/amd64/amd64/cpu_switch.S ============================================================================== --- stable/11/sys/amd64/amd64/cpu_switch.S Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/amd64/amd64/cpu_switch.S Mon Sep 11 08:48:36 2017 (r323431) @@ -87,7 +87,6 @@ END(cpu_throw) ENTRY(cpu_switch) /* Switch to new thread. First, save context. */ movq TD_PCB(%rdi),%r8 - orl $PCB_FULL_IRET,PCB_FLAGS(%r8) movq (%rsp),%rax /* Hardware registers */ movq %r15,PCB_R15(%r8) @@ -99,6 +98,30 @@ ENTRY(cpu_switch) movq %rbx,PCB_RBX(%r8) movq %rax,PCB_RIP(%r8) + testl $PCB_FULL_IRET,PCB_FLAGS(%r8) + jnz 2f + orl $PCB_FULL_IRET,PCB_FLAGS(%r8) + testl $TDP_KTHREAD,TD_PFLAGS(%rdi) + jnz 2f + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + movl %fs,%eax + cmpl $KUF32SEL,%eax + jne 1f + rdfsbaseq %rax + movq %rax,PCB_FSBASE(%r8) +1: movl %gs,%eax + cmpl $KUG32SEL,%eax + jne 2f + movq %rdx,%r12 + movl $MSR_KGSBASE,%ecx /* Read user gs base */ + rdmsr + shlq $32,%rdx + orq %rdx,%rax + movq %rax,PCB_GSBASE(%r8) + movq %r12,%rdx + +2: testl $PCB_DBREGS,PCB_FLAGS(%r8) jnz store_dr /* static predict not taken */ done_store_dr: Modified: stable/11/sys/amd64/amd64/exception.S ============================================================================== --- stable/11/sys/amd64/amd64/exception.S Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/amd64/amd64/exception.S Mon Sep 11 08:48:36 2017 (r323431) @@ -187,12 +187,13 @@ alltraps_testi: jz alltraps_pushregs_no_rdi sti alltraps_pushregs_no_rdi: - movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) + movq %rax,TF_RAX(%rsp) +alltraps_pushregs_no_rax: + movq %rsi,TF_RSI(%rsp) movq %rcx,TF_RCX(%rsp) movq %r8,TF_R8(%rsp) movq %r9,TF_R9(%rsp) - movq %rax,TF_RAX(%rsp) movq %rbx,TF_RBX(%rsp) movq %rbp,TF_RBP(%rsp) movq %r10,TF_R10(%rsp) @@ -326,31 +327,53 @@ IDTVEC(prot) prot_addrf: movq $0,TF_ADDR(%rsp) movq %rdi,TF_RDI(%rsp) /* free up a GP register */ + movq %rax,TF_RAX(%rsp) + movq %rdx,TF_RDX(%rsp) + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) leaq doreti_iret(%rip),%rdi cmpq %rdi,TF_RIP(%rsp) - je 1f /* kernel but with user gsbase!! */ + je 5f /* kernel but with user gsbase!! */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz 2f /* already running with kernel GS.base */ -1: swapgs -2: movq PCPU(CURPCB),%rdi - orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ - movw %fs,TF_FS(%rsp) - movw %gs,TF_GS(%rsp) + jz 6f /* already running with kernel GS.base */ + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 1f + rdfsbaseq %rax +1: cmpw $KUG32SEL,TF_GS(%rsp) + jne 2f + rdgsbaseq %rdx +2: swapgs + movq PCPU(CURPCB),%rdi + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 4f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 3f + movq %rax,PCB_FSBASE(%rdi) +3: cmpw $KUG32SEL,TF_GS(%rsp) + jne 4f + movq %rdx,PCB_GSBASE(%rdi) +4: orl $PCB_FULL_IRET,PCB_FLAGS(%rdi) /* always full iret from GPF */ movw %es,TF_ES(%rsp) movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) - jz alltraps_pushregs_no_rdi + jz alltraps_pushregs_no_rax sti - jmp alltraps_pushregs_no_rdi + jmp alltraps_pushregs_no_rax +5: swapgs +6: movq PCPU(CURPCB),%rdi + jmp 4b + /* * Fast syscall entry point. We enter here with just our new %cs/%ss set, * and the new privilige level. We are still running on the old user stack * pointer. We have to juggle a few things around to find our stack etc. * swapgs gives us access to our PCPU space only. * - * We do not support invoking this from a custom %cs or %ss (e.g. using - * entries from an LDT). + * We do not support invoking this from a custom segment registers, + * esp. %cs, %ss, %fs, %gs, e.g. using entries from an LDT. */ IDTVEC(fast_syscall) swapgs @@ -503,6 +526,23 @@ IDTVEC(nmi) nmi_fromuserspace: incl %ebx swapgs + testb $CPUID_STDEXT_FSGSBASE,cpu_stdext_feature(%rip) + jz 2f + movq PCPU(CURPCB),%rdi + testq %rdi,%rdi + jz 2f + cmpw $KUF32SEL,TF_FS(%rsp) + jne 1f + rdfsbaseq %rax + movq %rax,PCB_FSBASE(%rdi) +1: cmpw $KUG32SEL,TF_GS(%rsp) + jne 2f + movl $MSR_KGSBASE,%ecx + rdmsr + shlq $32,%rdx + orq %rdx,%rax + movq %rax,PCB_GSBASE(%rdi) +2: /* Note: this label is also used by ddb and gdb: */ nmi_calltrap: FAKE_MCOUNT(TF_RIP(%rsp)) @@ -705,6 +745,7 @@ doreti_exit: jz ld_regs testl $PCB_FULL_IRET,PCB_FLAGS(%r8) jz ld_regs + andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) testl $TF_HASSEGS,TF_FLAGS(%rsp) je set_segs Modified: stable/11/sys/amd64/amd64/machdep.c ============================================================================== --- stable/11/sys/amd64/amd64/machdep.c Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/amd64/amd64/machdep.c Mon Sep 11 08:48:36 2017 (r323431) @@ -379,6 +379,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); + update_pcb_bases(pcb); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; bzero(sf.sf_uc.uc_mcontext.mc_spare, @@ -449,7 +450,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; - set_pcb_flags(pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } @@ -555,6 +555,7 @@ sys_sigreturn(td, uap) return (ret); } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); + update_pcb_bases(pcb); pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; @@ -566,7 +567,6 @@ sys_sigreturn(td, uap) #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); - set_pcb_flags(pcb, PCB_FULL_IRET); return (EJUSTRETURN); } @@ -594,11 +594,11 @@ exec_setregs(struct thread *td, struct image_params *i else mtx_unlock(&dt_lock); + update_pcb_bases(pcb); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; - set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; @@ -2142,6 +2142,7 @@ get_mcontext(struct thread *td, mcontext_t *mcp, int f mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); + update_pcb_bases(pcb); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; mcp->mc_xfpustate = 0; @@ -2212,11 +2213,11 @@ set_mcontext(struct thread *td, mcontext_t *mcp) tp->tf_fs = mcp->mc_fs; tp->tf_gs = mcp->mc_gs; } + set_pcb_flags(pcb, PCB_FULL_IRET); if (mcp->mc_flags & _MC_HASBASES) { pcb->pcb_fsbase = mcp->mc_fsbase; pcb->pcb_gsbase = mcp->mc_gsbase; } - set_pcb_flags(pcb, PCB_FULL_IRET); return (0); } @@ -2485,6 +2486,71 @@ user_dbreg_trap(void) * None of the breakpoints are in user space. */ return 0; +} + +/* + * The pcb_flags is only modified by current thread, or by other threads + * when current thread is stopped. However, current thread may change it + * from the interrupt context in cpu_switch(), or in the trap handler. + * When we read-modify-write pcb_flags from C sources, compiler may generate + * code that is not atomic regarding the interrupt handler. If a trap or + * interrupt happens and any flag is modified from the handler, it can be + * clobbered with the cached value later. Therefore, we implement setting + * and clearing flags with single-instruction functions, which do not race + * with possible modification of the flags from the trap or interrupt context, + * because traps and interrupts are executed only on instruction boundary. + */ +void +set_pcb_flags_raw(struct pcb *pcb, const u_int flags) +{ + + __asm __volatile("orl %1,%0" + : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) + : "cc", "memory"); + +} + +/* + * The support for RDFSBASE, WRFSBASE and similar instructions for %gs + * base requires that kernel saves MSR_FSBASE and MSR_{K,}GSBASE into + * pcb if user space modified the bases. We must save on the context + * switch or if the return to usermode happens through the doreti. + * + * Tracking of both events is performed by the pcb flag PCB_FULL_IRET, + * which have a consequence that the base MSRs must be saved each time + * the PCB_FULL_IRET flag is set. We disable interrupts to sync with + * context switches. + */ +void +set_pcb_flags(struct pcb *pcb, const u_int flags) +{ + register_t r; + + if (curpcb == pcb && + (flags & PCB_FULL_IRET) != 0 && + (pcb->pcb_flags & PCB_FULL_IRET) == 0 && + (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0) { + r = intr_disable(); + if ((pcb->pcb_flags & PCB_FULL_IRET) == 0) { + if (rfs() == _ufssel) + pcb->pcb_fsbase = rdfsbase(); + if (rgs() == _ugssel) + pcb->pcb_gsbase = rdmsr(MSR_KGSBASE); + } + set_pcb_flags_raw(pcb, flags); + intr_restore(r); + } else { + set_pcb_flags_raw(pcb, flags); + } +} + +void +clear_pcb_flags(struct pcb *pcb, const u_int flags) +{ + + __asm __volatile("andl %1,%0" + : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) + : "cc", "memory"); } #ifdef KDB Modified: stable/11/sys/amd64/amd64/ptrace_machdep.c ============================================================================== --- stable/11/sys/amd64/amd64/ptrace_machdep.c Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/amd64/amd64/ptrace_machdep.c Mon Sep 11 08:48:36 2017 (r323431) @@ -117,15 +117,17 @@ cpu_ptrace_xstate(struct thread *td, int req, void *ad static void cpu_ptrace_setbase(struct thread *td, int req, register_t r) { + struct pcb *pcb; + pcb = td->td_pcb; + set_pcb_flags(pcb, PCB_FULL_IRET); if (req == PT_SETFSBASE) { - td->td_pcb->pcb_fsbase = r; + pcb->pcb_fsbase = r; td->td_frame->tf_fs = _ufssel; } else { - td->td_pcb->pcb_gsbase = r; + pcb->pcb_gsbase = r; td->td_frame->tf_gs = _ugssel; } - set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } #ifdef COMPAT_FREEBSD32 @@ -136,6 +138,7 @@ static int cpu32_ptrace(struct thread *td, int req, void *addr, int data) { struct savefpu *fpstate; + struct pcb *pcb; uint32_t r; int error; @@ -167,8 +170,10 @@ cpu32_ptrace(struct thread *td, int req, void *addr, i error = EINVAL; break; } - r = req == PT_GETFSBASE ? td->td_pcb->pcb_fsbase : - td->td_pcb->pcb_gsbase; + pcb = td->td_pcb; + if (td == curthread) + update_pcb_bases(pcb); + r = req == PT_GETFSBASE ? pcb->pcb_fsbase : pcb->pcb_gsbase; error = copyout(&r, addr, sizeof(r)); break; @@ -197,6 +202,7 @@ int cpu_ptrace(struct thread *td, int req, void *addr, int data) { register_t *r, rv; + struct pcb *pcb; int error; #ifdef COMPAT_FREEBSD32 @@ -221,8 +227,10 @@ cpu_ptrace(struct thread *td, int req, void *addr, int case PT_GETFSBASE: case PT_GETGSBASE: - r = req == PT_GETFSBASE ? &td->td_pcb->pcb_fsbase : - &td->td_pcb->pcb_gsbase; + pcb = td->td_pcb; + if (td == curthread) + update_pcb_bases(pcb); + r = req == PT_GETFSBASE ? &pcb->pcb_fsbase : &pcb->pcb_gsbase; error = copyout(r, addr, sizeof(*r)); break; Modified: stable/11/sys/amd64/amd64/sys_machdep.c ============================================================================== --- stable/11/sys/amd64/amd64/sys_machdep.c Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/amd64/amd64/sys_machdep.c Mon Sep 11 08:48:36 2017 (r323431) @@ -256,39 +256,45 @@ sysarch(td, uap) error = amd64_set_ioperm(td, &iargs); break; case I386_GET_FSBASE: + update_pcb_bases(pcb); i386base = pcb->pcb_fsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_FSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_fsbase = i386base; td->td_frame->tf_fs = _ufssel; update_gdt_fsbase(td, i386base); } break; case I386_GET_GSBASE: + update_pcb_bases(pcb); i386base = pcb->pcb_gsbase; error = copyout(&i386base, uap->parms, sizeof(i386base)); break; case I386_SET_GSBASE: error = copyin(uap->parms, &i386base, sizeof(i386base)); if (!error) { + set_pcb_flags(pcb, PCB_FULL_IRET); pcb->pcb_gsbase = i386base; td->td_frame->tf_gs = _ugssel; update_gdt_gsbase(td, i386base); } break; case AMD64_GET_FSBASE: - error = copyout(&pcb->pcb_fsbase, uap->parms, sizeof(pcb->pcb_fsbase)); + update_pcb_bases(pcb); + error = copyout(&pcb->pcb_fsbase, uap->parms, + sizeof(pcb->pcb_fsbase)); break; case AMD64_SET_FSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - pcb->pcb_fsbase = a64base; set_pcb_flags(pcb, PCB_FULL_IRET); + pcb->pcb_fsbase = a64base; td->td_frame->tf_fs = _ufssel; } else error = EINVAL; @@ -296,15 +302,17 @@ sysarch(td, uap) break; case AMD64_GET_GSBASE: - error = copyout(&pcb->pcb_gsbase, uap->parms, sizeof(pcb->pcb_gsbase)); + update_pcb_bases(pcb); + error = copyout(&pcb->pcb_gsbase, uap->parms, + sizeof(pcb->pcb_gsbase)); break; case AMD64_SET_GSBASE: error = copyin(uap->parms, &a64base, sizeof(a64base)); if (!error) { if (a64base < VM_MAXUSER_ADDRESS) { - pcb->pcb_gsbase = a64base; set_pcb_flags(pcb, PCB_FULL_IRET); + pcb->pcb_gsbase = a64base; td->td_frame->tf_gs = _ugssel; } else error = EINVAL; Modified: stable/11/sys/amd64/amd64/vm_machdep.c ============================================================================== --- stable/11/sys/amd64/amd64/vm_machdep.c Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/amd64/amd64/vm_machdep.c Mon Sep 11 08:48:36 2017 (r323431) @@ -176,6 +176,7 @@ cpu_fork(td1, p2, td2, flags) /* Ensure that td1's pcb is up to date. */ fpuexit(td1); + update_pcb_bases(td1->td_pcb); /* Point the pcb to the top of the stack */ pcb2 = get_pcb_td(td2); @@ -242,7 +243,7 @@ cpu_fork(td1, p2, td2, flags) pcb2->pcb_tssp = NULL; /* New segment registers. */ - set_pcb_flags(pcb2, PCB_FULL_IRET); + set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* Copy the LDT, if necessary. */ mdp1 = &td1->td_proc->p_md; @@ -437,13 +438,14 @@ cpu_copy_thread(struct thread *td, struct thread *td0) * Those not loaded individually below get their default * values here. */ + update_pcb_bases(td0->td_pcb); bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); clear_pcb_flags(pcb2, PCB_FPUINITDONE | PCB_USERFPUINITDONE | PCB_KERNFPU); pcb2->pcb_save = get_pcb_user_save_pcb(pcb2); bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save, cpu_max_ext_state_size); - set_pcb_flags(pcb2, PCB_FULL_IRET); + set_pcb_flags_raw(pcb2, PCB_FULL_IRET); /* * Create a new fresh stack for the new thread. Modified: stable/11/sys/amd64/include/asmacros.h ============================================================================== --- stable/11/sys/amd64/include/asmacros.h Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/amd64/include/asmacros.h Mon Sep 11 08:48:36 2017 (r323431) @@ -177,7 +177,12 @@ movw %es,TF_ES(%rsp) ; \ movw %ds,TF_DS(%rsp) ; \ movl $TF_HASSEGS,TF_FLAGS(%rsp) ; \ - cld + cld ; \ + testb $SEL_RPL_MASK,TF_CS(%rsp) ; /* come from kernel ? */ \ + jz 2f ; /* yes, leave PCB_FULL_IRET alone */ \ + movq PCPU(CURPCB),%r8 ; \ + andl $~PCB_FULL_IRET,PCB_FLAGS(%r8) ; \ +2: #define POP_FRAME \ movq TF_RDI(%rsp),%rdi ; \ Modified: stable/11/sys/amd64/include/pcb.h ============================================================================== --- stable/11/sys/amd64/include/pcb.h Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/amd64/include/pcb.h Mon Sep 11 08:48:36 2017 (r323431) @@ -119,40 +119,15 @@ struct susppcb { #ifdef _KERNEL struct trapframe; -/* - * The pcb_flags is only modified by current thread, or by other threads - * when current thread is stopped. However, current thread may change it - * from the interrupt context in cpu_switch(), or in the trap handler. - * When we read-modify-write pcb_flags from C sources, compiler may generate - * code that is not atomic regarding the interrupt handler. If a trap or - * interrupt happens and any flag is modified from the handler, it can be - * clobbered with the cached value later. Therefore, we implement setting - * and clearing flags with single-instruction functions, which do not race - * with possible modification of the flags from the trap or interrupt context, - * because traps and interrupts are executed only on instruction boundary. - */ -static __inline void -set_pcb_flags(struct pcb *pcb, const u_int flags) -{ - - __asm __volatile("orl %1,%0" - : "=m" (pcb->pcb_flags) : "ir" (flags), "m" (pcb->pcb_flags) - : "cc"); -} - -static __inline void -clear_pcb_flags(struct pcb *pcb, const u_int flags) -{ - - __asm __volatile("andl %1,%0" - : "=m" (pcb->pcb_flags) : "ir" (~flags), "m" (pcb->pcb_flags) - : "cc"); -} - +void clear_pcb_flags(struct pcb *pcb, const u_int flags); void makectx(struct trapframe *, struct pcb *); +void set_pcb_flags(struct pcb *pcb, const u_int flags); +void set_pcb_flags_raw(struct pcb *pcb, const u_int flags); int savectx(struct pcb *) __returns_twice; void resumectx(struct pcb *); +/* Ensure that pcb_gsbase and pcb_fsbase are up to date */ +#define update_pcb_bases(pcb) set_pcb_flags((pcb), PCB_FULL_IRET) #endif #endif /* _AMD64_PCB_H_ */ Modified: stable/11/sys/amd64/vmm/intel/vmx_msr.c ============================================================================== --- stable/11/sys/amd64/vmm/intel/vmx_msr.c Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/amd64/vmm/intel/vmx_msr.c Mon Sep 11 08:48:36 2017 (r323431) @@ -31,10 +31,12 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include +#include #include #include @@ -356,7 +358,8 @@ vmx_msr_guest_enter(struct vmx *vmx, int vcpuid) { uint64_t *guest_msrs = vmx->guest_msrs[vcpuid]; - /* Save host MSRs (if any) and restore guest MSRs */ + /* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */ + update_pcb_bases(curpcb); wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]); wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]); wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]); Modified: stable/11/sys/sys/param.h ============================================================================== --- stable/11/sys/sys/param.h Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/sys/param.h Mon Sep 11 08:48:36 2017 (r323431) @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1101502 /* Master, propagated to newvers */ +#define __FreeBSD_version 1101503 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, @@ -84,6 +84,8 @@ #define P_OSREL_SHUTDOWN_ENOTCONN 1100077 #define P_OSREL_MAP_GUARD 1200035 #define P_OSREL_MAP_GUARD_11 1101501 +#define P_OSREL_WRFSBASE 1200041 +#define P_OSREL_WRFSBASE_11 1101503 #define P_OSREL_MAJOR(x) ((x) / 100000) #endif Modified: stable/11/sys/x86/x86/identcpu.c ============================================================================== --- stable/11/sys/x86/x86/identcpu.c Mon Sep 11 08:18:56 2017 (r323430) +++ stable/11/sys/x86/x86/identcpu.c Mon Sep 11 08:48:36 2017 (r323431) @@ -1423,18 +1423,15 @@ finishidentcpu(void) cpu_stdext_feature = regs[1]; /* - * Some hypervisors fail to filter out unsupported - * extended features. For now, disable the + * Some hypervisors failed to filter out unsupported + * extended features. Allow to disable the * extensions, activation of which requires setting a * bit in CR4, and which VM monitors do not support. */ - if (cpu_feature2 & CPUID2_HV) { - cpu_stdext_disable = CPUID_STDEXT_FSGSBASE | - CPUID_STDEXT_SMEP; - } else - cpu_stdext_disable = 0; + cpu_stdext_disable = 0; TUNABLE_INT_FETCH("hw.cpu_stdext_disable", &cpu_stdext_disable); cpu_stdext_feature &= ~cpu_stdext_disable; + cpu_stdext_feature2 = regs[2]; }