From owner-svn-src-stable@freebsd.org Wed Jun 20 18:51:40 2018 Return-Path: Delivered-To: svn-src-stable@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 6EDD21003FE4; Wed, 20 Jun 2018 18:51:40 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 2392F6D33A; Wed, 20 Jun 2018 18:51:40 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 062B6263C3; Wed, 20 Jun 2018 18:51:40 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w5KIpdZG036763; Wed, 20 Jun 2018 18:51:39 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w5KIpdjS036760; Wed, 20 Jun 2018 18:51:39 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201806201851.w5KIpdjS036760@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Wed, 20 Jun 2018 18:51:39 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r335455 - in stable/10/sys: amd64/amd64 i386/i386 i386/isa X-SVN-Group: stable-10 X-SVN-Commit-Author: kib X-SVN-Commit-Paths: in stable/10/sys: amd64/amd64 i386/i386 i386/isa X-SVN-Commit-Revision: 335455 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.26 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 20 Jun 2018 18:51:40 -0000 Author: kib Date: Wed Jun 20 18:51:38 2018 New Revision: 335455 URL: https://svnweb.freebsd.org/changeset/base/335455 Log: MFC r335072, r335089, r335131, r335132: Enable eager FPU context switch on i386 and amd64. CVE: CVE-2018-3665 Tested by: emaste (smoke boot) Modified: stable/10/sys/amd64/amd64/cpu_switch.S stable/10/sys/amd64/amd64/fpu.c stable/10/sys/i386/i386/swtch.s stable/10/sys/i386/isa/npx.c Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/amd64/amd64/cpu_switch.S ============================================================================== --- stable/10/sys/amd64/amd64/cpu_switch.S Wed Jun 20 18:43:17 2018 (r335454) +++ stable/10/sys/amd64/amd64/cpu_switch.S Wed Jun 20 18:51:38 2018 (r335455) @@ -111,10 +111,10 @@ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) - jne 3f + jne 2f movq PCB_SAVEFPU(%r8),%r8 clts - cmpl $0,use_xsave + cmpl $0,use_xsave(%rip) jne 1f fxsave (%r8) jmp 2f @@ -126,12 +126,7 @@ ctx_switch_xsave: /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */ xsave (%r8) movq %rcx,%rdx -2: smsw %ax - orb $CR0_TS,%al - lmsw %ax - xorl %eax,%eax - movq %rax,PCPU(FPCURTHREAD) -3: +2: /* Save is done. Now fire up new thread. Leave old vmspace. */ movq TD_PCB(%rsi),%r8 @@ -255,6 +250,8 @@ done_load_dr: movq PCB_RBX(%r8),%rbx movq PCB_RIP(%r8),%rax movq %rax,(%rsp) + movq PCPU(CURTHREAD),%rdi + call fpu_activate_sw ret /* Modified: stable/10/sys/amd64/amd64/fpu.c ============================================================================== --- stable/10/sys/amd64/amd64/fpu.c Wed Jun 20 18:43:17 2018 (r335454) +++ stable/10/sys/amd64/amd64/fpu.c Wed Jun 20 18:51:38 2018 (r335455) @@ -139,6 +139,11 @@ static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); +int lazy_fpu_switch = 0; +SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, + &lazy_fpu_switch, 0, + "Lazily load FPU context after context switch"); + int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static uma_zone_t fpu_save_area_zone; @@ -204,6 +209,7 @@ fpuinit_bsp1(void) u_int cp[4]; uint64_t xsave_mask_user; + TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); if ((cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); @@ -611,6 +617,45 @@ fputrap_sse(void) return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } +static void +restore_fpu_curthread(struct thread *td) +{ + struct pcb *pcb; + + /* + * Record new context early in case frstor causes a trap. + */ + PCPU_SET(fpcurthread, td); + + stop_emulating(); + fpu_clean_state(); + pcb = td->td_pcb; + + if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { + /* + * This is the first time this thread has used the FPU or + * the PCB doesn't contain a clean FPU state. Explicitly + * load an initial state. + * + * We prefer to restore the state from the actual save + * area in PCB instead of directly loading from + * fpu_initialstate, to ignite the XSAVEOPT + * tracking engine. + */ + bcopy(fpu_initialstate, pcb->pcb_save, + cpu_max_ext_state_size); + fpurestore(pcb->pcb_save); + if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) + fldcw(pcb->pcb_initial_fpucw); + if (PCB_USER_FPU(pcb)) + set_pcb_flags(pcb, PCB_FPUINITDONE | + PCB_USERFPUINITDONE); + else + set_pcb_flags(pcb, PCB_FPUINITDONE); + } else + fpurestore(pcb->pcb_save); +} + /* * Device Not Available (DNA, #NM) exception handler. * @@ -621,7 +666,9 @@ fputrap_sse(void) void fpudna(void) { + struct thread *td; + td = curthread; /* * This handler is entered with interrupts enabled, so context * switches may occur before critical_enter() is executed. If @@ -633,49 +680,38 @@ fpudna(void) */ critical_enter(); - if (PCPU_GET(fpcurthread) == curthread) { - printf("fpudna: fpcurthread == curthread\n"); + if (__predict_false(PCPU_GET(fpcurthread) == td)) { + /* + * Some virtual machines seems to set %cr0.TS at + * arbitrary moments. Silently clear the TS bit + * regardless of the eager/lazy FPU context switch + * mode. + */ stop_emulating(); - critical_exit(); - return; + } else { + if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { + panic( + "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", + PCPU_GET(fpcurthread), + PCPU_GET(fpcurthread)->td_tid, td, td->td_tid); + } + restore_fpu_curthread(td); } - if (PCPU_GET(fpcurthread) != NULL) { - panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", - PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, - curthread, curthread->td_tid); - } - stop_emulating(); - /* - * Record new context early in case frstor causes a trap. - */ - PCPU_SET(fpcurthread, curthread); + critical_exit(); +} - fpu_clean_state(); +void fpu_activate_sw(struct thread *td); /* Called from the context switch */ +void +fpu_activate_sw(struct thread *td) +{ - if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) { - /* - * This is the first time this thread has used the FPU or - * the PCB doesn't contain a clean FPU state. Explicitly - * load an initial state. - * - * We prefer to restore the state from the actual save - * area in PCB instead of directly loading from - * fpu_initialstate, to ignite the XSAVEOPT - * tracking engine. - */ - bcopy(fpu_initialstate, curpcb->pcb_save, - cpu_max_ext_state_size); - fpurestore(curpcb->pcb_save); - if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__) - fldcw(curpcb->pcb_initial_fpucw); - if (PCB_USER_FPU(curpcb)) - set_pcb_flags(curpcb, - PCB_FPUINITDONE | PCB_USERFPUINITDONE); - else - set_pcb_flags(curpcb, PCB_FPUINITDONE); - } else - fpurestore(curpcb->pcb_save); - critical_exit(); + if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || + !PCB_USER_FPU(td->td_pcb)) { + PCPU_SET(fpcurthread, NULL); + start_emulating(); + } else if (PCPU_GET(fpcurthread) != td) { + restore_fpu_curthread(td); + } } void Modified: stable/10/sys/i386/i386/swtch.s ============================================================================== --- stable/10/sys/i386/i386/swtch.s Wed Jun 20 18:43:17 2018 (r335454) +++ stable/10/sys/i386/i386/swtch.s Wed Jun 20 18:51:38 2018 (r335455) @@ -314,6 +314,12 @@ sw1: cpu_switch_load_gs: mov PCB_GS(%edx),%gs + pushl %edx + pushl PCPU(CURTHREAD) + call npxswitch + popl %edx + popl %edx + /* Test if debug registers should be restored. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) jz 1f Modified: stable/10/sys/i386/isa/npx.c ============================================================================== --- stable/10/sys/i386/isa/npx.c Wed Jun 20 18:43:17 2018 (r335454) +++ stable/10/sys/i386/isa/npx.c Wed Jun 20 18:51:38 2018 (r335455) @@ -219,6 +219,11 @@ int hw_float; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floating point instructions executed in hardware"); +int lazy_fpu_switch = 0; +SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, + &lazy_fpu_switch, 0, + "Lazily load FPU context after context switch"); + #ifdef CPU_ENABLE_SSE int use_xsave; uint64_t xsave_mask; @@ -360,6 +365,7 @@ npxinit_bsp1(void) u_int cp[4]; uint64_t xsave_mask_user; + TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); @@ -840,49 +846,22 @@ npxtrap_sse(void) } #endif -/* - * Implement device not available (DNA) exception - * - * It would be better to switch FP context here (if curthread != fpcurthread) - * and not necessarily for every context switch, but it is too hard to - * access foreign pcb's. - */ - -static int err_count = 0; - -int -npxdna(void) +static void +restore_npx_curthread(struct thread *td, struct pcb *pcb) { - if (!hw_float) - return (0); - critical_enter(); - if (PCPU_GET(fpcurthread) == curthread) { - printf("npxdna: fpcurthread == curthread %d times\n", - ++err_count); - stop_emulating(); - critical_exit(); - return (1); - } - if (PCPU_GET(fpcurthread) != NULL) { - printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", - PCPU_GET(fpcurthread), - PCPU_GET(fpcurthread)->td_proc->p_pid, - curthread, curthread->td_proc->p_pid); - panic("npxdna"); - } - stop_emulating(); /* * Record new context early in case frstor causes a trap. */ - PCPU_SET(fpcurthread, curthread); + PCPU_SET(fpcurthread, td); + stop_emulating(); #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fpu_clean_state(); #endif - if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) { + if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly @@ -893,18 +872,54 @@ npxdna(void) * npx_initialstate, to ignite the XSAVEOPT * tracking engine. */ - bcopy(npx_initialstate, curpcb->pcb_save, cpu_max_ext_state_size); - fpurstor(curpcb->pcb_save); - if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__) - fldcw(curpcb->pcb_initial_npxcw); - curpcb->pcb_flags |= PCB_NPXINITDONE; - if (PCB_USER_FPU(curpcb)) - curpcb->pcb_flags |= PCB_NPXUSERINITDONE; + bcopy(npx_initialstate, pcb->pcb_save, cpu_max_ext_state_size); + fpurstor(pcb->pcb_save); + if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) + fldcw(pcb->pcb_initial_npxcw); + pcb->pcb_flags |= PCB_NPXINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { - fpurstor(curpcb->pcb_save); + fpurstor(pcb->pcb_save); } - critical_exit(); +} +/* + * Implement device not available (DNA) exception + * + * It would be better to switch FP context here (if curthread != fpcurthread) + * and not necessarily for every context switch, but it is too hard to + * access foreign pcb's. + */ +int +npxdna(void) +{ + struct thread *td; + + if (!hw_float) + return (0); + td = curthread; + critical_enter(); + if (__predict_false(PCPU_GET(fpcurthread) == td)) { + /* + * Some virtual machines seems to set %cr0.TS at + * arbitrary moments. Silently clear the TS bit + * regardless of the eager/lazy FPU context switch + * mode. + */ + stop_emulating(); + } else { + if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { + printf( + "npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", + PCPU_GET(fpcurthread), + PCPU_GET(fpcurthread)->td_proc->p_pid, + td, td->td_proc->p_pid); + panic("npxdna"); + } + restore_npx_curthread(td, td->td_pcb); + } + critical_exit(); return (1); } @@ -928,8 +943,20 @@ npxsave(addr) else #endif fpusave(addr); - start_emulating(); - PCPU_SET(fpcurthread, NULL); +} + +void npxswitch(struct thread *td, struct pcb *pcb); +void +npxswitch(struct thread *td, struct pcb *pcb) +{ + + if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || + !PCB_USER_FPU(pcb)) { + start_emulating(); + PCPU_SET(fpcurthread, NULL); + } else if (PCPU_GET(fpcurthread) != td) { + restore_npx_curthread(td, pcb); + } } /*