From owner-svn-src-releng@freebsd.org Fri Jun 15 13:21:39 2018 Return-Path: Delivered-To: svn-src-releng@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 70B4F10038BD; Fri, 15 Jun 2018 13:21:39 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 264C66CF7C; Fri, 15 Jun 2018 13:21:39 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 08FAE17DF0; Fri, 15 Jun 2018 13:21:39 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w5FDLcTY015602; Fri, 15 Jun 2018 13:21:38 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w5FDLcRt015598; Fri, 15 Jun 2018 13:21:38 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201806151321.w5FDLcRt015598@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Fri, 15 Jun 2018 13:21:38 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-releng@freebsd.org Subject: svn commit: r335196 - in releng/11.2/sys: amd64/amd64 i386/i386 i386/isa X-SVN-Group: releng X-SVN-Commit-Author: kib X-SVN-Commit-Paths: in releng/11.2/sys: amd64/amd64 i386/i386 i386/isa X-SVN-Commit-Revision: 335196 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-releng@freebsd.org X-Mailman-Version: 2.1.26 Precedence: list List-Id: SVN commit messages for the release engineering / security commits to the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 15 Jun 2018 13:21:39 -0000 Author: kib Date: Fri Jun 15 13:21:37 2018 New Revision: 335196 URL: https://svnweb.freebsd.org/changeset/base/335196 Log: MFC rr335072, r335089: Enable eager FPU context switch on i386 and amd64. CVE: CVE-2018-3665 MFC r335131 Remove printf() in #NM handler. MFC r335132: Reorganize code flow in fpudna()/npxdna(). Approved by: re (gjb) Modified: releng/11.2/sys/amd64/amd64/cpu_switch.S releng/11.2/sys/amd64/amd64/fpu.c releng/11.2/sys/i386/i386/swtch.s releng/11.2/sys/i386/isa/npx.c Directory Properties: releng/11.2/ (props changed) Modified: releng/11.2/sys/amd64/amd64/cpu_switch.S ============================================================================== --- releng/11.2/sys/amd64/amd64/cpu_switch.S Fri Jun 15 13:14:45 2018 (r335195) +++ releng/11.2/sys/amd64/amd64/cpu_switch.S Fri Jun 15 13:21:37 2018 (r335196) @@ -128,10 +128,10 @@ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) - jne 3f + jne 2f movq PCB_SAVEFPU(%r8),%r8 clts - cmpl $0,use_xsave + cmpl $0,use_xsave(%rip) jne 1f fxsave (%r8) jmp 2f @@ -143,12 +143,7 @@ ctx_switch_xsave: /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */ xsave (%r8) movq %rcx,%rdx -2: smsw %ax - orb $CR0_TS,%al - lmsw %ax - xorl %eax,%eax - movq %rax,PCPU(FPCURTHREAD) -3: +2: /* Save is done. Now fire up new thread. Leave old vmspace. */ movq %rsi,%r12 movq %rdi,%r13 @@ -235,6 +230,8 @@ done_load_dr: movq PCB_RBX(%r8),%rbx movq PCB_RIP(%r8),%rax movq %rax,(%rsp) + movq PCPU(CURTHREAD),%rdi + call fpu_activate_sw ret /* Modified: releng/11.2/sys/amd64/amd64/fpu.c ============================================================================== --- releng/11.2/sys/amd64/amd64/fpu.c Fri Jun 15 13:14:45 2018 (r335195) +++ releng/11.2/sys/amd64/amd64/fpu.c Fri Jun 15 13:21:37 2018 (r335196) @@ -139,6 +139,11 @@ static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); +int lazy_fpu_switch = 0; +SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, + &lazy_fpu_switch, 0, + "Lazily load FPU context after context switch"); + int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static uma_zone_t fpu_save_area_zone; @@ -204,6 +209,7 @@ fpuinit_bsp1(void) u_int cp[4]; uint64_t xsave_mask_user; + TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); if ((cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); @@ -612,6 +618,45 @@ fputrap_sse(void) return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } +static void +restore_fpu_curthread(struct thread *td) +{ + struct pcb *pcb; + + /* + * Record new context early in case frstor causes a trap. + */ + PCPU_SET(fpcurthread, td); + + stop_emulating(); + fpu_clean_state(); + pcb = td->td_pcb; + + if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { + /* + * This is the first time this thread has used the FPU or + * the PCB doesn't contain a clean FPU state. Explicitly + * load an initial state. + * + * We prefer to restore the state from the actual save + * area in PCB instead of directly loading from + * fpu_initialstate, to ignite the XSAVEOPT + * tracking engine. + */ + bcopy(fpu_initialstate, pcb->pcb_save, + cpu_max_ext_state_size); + fpurestore(pcb->pcb_save); + if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) + fldcw(pcb->pcb_initial_fpucw); + if (PCB_USER_FPU(pcb)) + set_pcb_flags(pcb, PCB_FPUINITDONE | + PCB_USERFPUINITDONE); + else + set_pcb_flags(pcb, PCB_FPUINITDONE); + } else + fpurestore(pcb->pcb_save); +} + /* * Device Not Available (DNA, #NM) exception handler. * @@ -622,7 +667,9 @@ fputrap_sse(void) void fpudna(void) { + struct thread *td; + td = curthread; /* * This handler is entered with interrupts enabled, so context * switches may occur before critical_enter() is executed. If @@ -636,49 +683,38 @@ fpudna(void) KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); - if (PCPU_GET(fpcurthread) == curthread) { - printf("fpudna: fpcurthread == curthread\n"); + if (__predict_false(PCPU_GET(fpcurthread) == td)) { + /* + * Some virtual machines seems to set %cr0.TS at + * arbitrary moments. Silently clear the TS bit + * regardless of the eager/lazy FPU context switch + * mode. + */ stop_emulating(); - critical_exit(); - return; + } else { + if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { + panic( + "fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", + PCPU_GET(fpcurthread), + PCPU_GET(fpcurthread)->td_tid, td, td->td_tid); + } + restore_fpu_curthread(td); } - if (PCPU_GET(fpcurthread) != NULL) { - panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", - PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, - curthread, curthread->td_tid); - } - stop_emulating(); - /* - * Record new context early in case frstor causes a trap. - */ - PCPU_SET(fpcurthread, curthread); + critical_exit(); +} - fpu_clean_state(); +void fpu_activate_sw(struct thread *td); /* Called from the context switch */ +void +fpu_activate_sw(struct thread *td) +{ - if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) { - /* - * This is the first time this thread has used the FPU or - * the PCB doesn't contain a clean FPU state. Explicitly - * load an initial state. - * - * We prefer to restore the state from the actual save - * area in PCB instead of directly loading from - * fpu_initialstate, to ignite the XSAVEOPT - * tracking engine. - */ - bcopy(fpu_initialstate, curpcb->pcb_save, - cpu_max_ext_state_size); - fpurestore(curpcb->pcb_save); - if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__) - fldcw(curpcb->pcb_initial_fpucw); - if (PCB_USER_FPU(curpcb)) - set_pcb_flags(curpcb, - PCB_FPUINITDONE | PCB_USERFPUINITDONE); - else - set_pcb_flags(curpcb, PCB_FPUINITDONE); - } else - fpurestore(curpcb->pcb_save); - critical_exit(); + if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || + !PCB_USER_FPU(td->td_pcb)) { + PCPU_SET(fpcurthread, NULL); + start_emulating(); + } else if (PCPU_GET(fpcurthread) != td) { + restore_fpu_curthread(td); + } } void Modified: releng/11.2/sys/i386/i386/swtch.s ============================================================================== --- releng/11.2/sys/i386/i386/swtch.s Fri Jun 15 13:14:45 2018 (r335195) +++ releng/11.2/sys/i386/i386/swtch.s Fri Jun 15 13:21:37 2018 (r335196) @@ -296,6 +296,12 @@ sw1: cpu_switch_load_gs: mov PCB_GS(%edx),%gs + pushl %edx + pushl PCPU(CURTHREAD) + call npxswitch + popl %edx + popl %edx + /* Test if debug registers should be restored. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) jz 1f Modified: releng/11.2/sys/i386/isa/npx.c ============================================================================== --- releng/11.2/sys/i386/isa/npx.c Fri Jun 15 13:14:45 2018 (r335195) +++ releng/11.2/sys/i386/isa/npx.c Fri Jun 15 13:21:37 2018 (r335196) @@ -191,6 +191,11 @@ int hw_float; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, "Floating point instructions executed in hardware"); +int lazy_fpu_switch = 0; +SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, + &lazy_fpu_switch, 0, + "Lazily load FPU context after context switch"); + int use_xsave; uint64_t xsave_mask; static uma_zone_t fpu_save_area_zone; @@ -327,6 +332,7 @@ npxinit_bsp1(void) u_int cp[4]; uint64_t xsave_mask_user; + TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); if (cpu_fxsr && (cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); @@ -785,47 +791,20 @@ npxtrap_sse(void) return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } -/* - * Implement device not available (DNA) exception - * - * It would be better to switch FP context here (if curthread != fpcurthread) - * and not necessarily for every context switch, but it is too hard to - * access foreign pcb's. - */ - -static int err_count = 0; - -int -npxdna(void) +static void +restore_npx_curthread(struct thread *td, struct pcb *pcb) { - if (!hw_float) - return (0); - critical_enter(); - if (PCPU_GET(fpcurthread) == curthread) { - printf("npxdna: fpcurthread == curthread %d times\n", - ++err_count); - stop_emulating(); - critical_exit(); - return (1); - } - if (PCPU_GET(fpcurthread) != NULL) { - printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", - PCPU_GET(fpcurthread), - PCPU_GET(fpcurthread)->td_proc->p_pid, - curthread, curthread->td_proc->p_pid); - panic("npxdna"); - } - stop_emulating(); /* * Record new context early in case frstor causes a trap. */ - PCPU_SET(fpcurthread, curthread); + PCPU_SET(fpcurthread, td); + stop_emulating(); if (cpu_fxsr) fpu_clean_state(); - if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) { + if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { /* * This is the first time this thread has used the FPU or * the PCB doesn't contain a clean FPU state. Explicitly @@ -836,18 +815,54 @@ npxdna(void) * npx_initialstate, to ignite the XSAVEOPT * tracking engine. */ - bcopy(npx_initialstate, curpcb->pcb_save, cpu_max_ext_state_size); - fpurstor(curpcb->pcb_save); - if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__) - fldcw(curpcb->pcb_initial_npxcw); - curpcb->pcb_flags |= PCB_NPXINITDONE; - if (PCB_USER_FPU(curpcb)) - curpcb->pcb_flags |= PCB_NPXUSERINITDONE; + bcopy(npx_initialstate, pcb->pcb_save, cpu_max_ext_state_size); + fpurstor(pcb->pcb_save); + if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) + fldcw(pcb->pcb_initial_npxcw); + pcb->pcb_flags |= PCB_NPXINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { - fpurstor(curpcb->pcb_save); + fpurstor(pcb->pcb_save); } - critical_exit(); +} +/* + * Implement device not available (DNA) exception + * + * It would be better to switch FP context here (if curthread != fpcurthread) + * and not necessarily for every context switch, but it is too hard to + * access foreign pcb's. + */ +int +npxdna(void) +{ + struct thread *td; + + if (!hw_float) + return (0); + td = curthread; + critical_enter(); + if (__predict_false(PCPU_GET(fpcurthread) == td)) { + /* + * Some virtual machines seems to set %cr0.TS at + * arbitrary moments. Silently clear the TS bit + * regardless of the eager/lazy FPU context switch + * mode. + */ + stop_emulating(); + } else { + if (__predict_false(PCPU_GET(fpcurthread) != NULL)) { + printf( + "npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", + PCPU_GET(fpcurthread), + PCPU_GET(fpcurthread)->td_proc->p_pid, + td, td->td_proc->p_pid); + panic("npxdna"); + } + restore_npx_curthread(td, td->td_pcb); + } + critical_exit(); return (1); } @@ -869,8 +884,20 @@ npxsave(addr) xsaveopt((char *)addr, xsave_mask); else fpusave(addr); - start_emulating(); - PCPU_SET(fpcurthread, NULL); +} + +void npxswitch(struct thread *td, struct pcb *pcb); +void +npxswitch(struct thread *td, struct pcb *pcb) +{ + + if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || + !PCB_USER_FPU(pcb)) { + start_emulating(); + PCPU_SET(fpcurthread, NULL); + } else if (PCPU_GET(fpcurthread) != td) { + restore_npx_curthread(td, pcb); + } } /*