From owner-svn-src-head@freebsd.org Wed Jun 13 17:55:11 2018 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id DBE1C1000B64; Wed, 13 Jun 2018 17:55:10 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 8D51870B41; Wed, 13 Jun 2018 17:55:10 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 6E9C71CD66; Wed, 13 Jun 2018 17:55:10 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w5DHtAPZ073316; Wed, 13 Jun 2018 17:55:10 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w5DHtAwo073314; Wed, 13 Jun 2018 17:55:10 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201806131755.w5DHtAwo073314@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Wed, 13 Jun 2018 17:55:10 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r335072 - head/sys/amd64/amd64 X-SVN-Group: head X-SVN-Commit-Author: kib X-SVN-Commit-Paths: head/sys/amd64/amd64 X-SVN-Commit-Revision: 335072 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.26 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 13 Jun 2018 17:55:11 -0000 Author: kib Date: Wed Jun 13 17:55:09 2018 New Revision: 335072 URL: https://svnweb.freebsd.org/changeset/base/335072 Log: Enable eager FPU context switch by default on amd64. With compilers making increasing use of vector instructions the performance benefit of lazily switching FPU state is no longer a desirable tradeoff. Linux switched to eager FPU context switch some time ago, and the idea was floated on the FreeBSD-current mailing list some years ago[1]. Enable eager FPU context switch by default on amd64, with a tunable/sysctl available to turn it back off. [1] https://lists.freebsd.org/pipermail/freebsd-current/2015-March/055198.html Reviewed by: jhb Tested by: pho Sponsored by: The FreeBSD Foundation Modified: head/sys/amd64/amd64/cpu_switch.S head/sys/amd64/amd64/fpu.c Modified: head/sys/amd64/amd64/cpu_switch.S ============================================================================== --- head/sys/amd64/amd64/cpu_switch.S Wed Jun 13 17:42:55 2018 (r335071) +++ head/sys/amd64/amd64/cpu_switch.S Wed Jun 13 17:55:09 2018 (r335072) @@ -128,10 +128,10 @@ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) - jne 3f + jne 2f movq PCB_SAVEFPU(%r8),%r8 clts - cmpl $0,use_xsave + cmpl $0,use_xsave(%rip) jne 1f fxsave (%r8) jmp 2f @@ -143,12 +143,7 @@ ctx_switch_xsave: /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */ xsave (%r8) movq %rcx,%rdx -2: smsw %ax - orb $CR0_TS,%al - lmsw %ax - xorl %eax,%eax - movq %rax,PCPU(FPCURTHREAD) -3: +2: /* Save is done. Now fire up new thread. Leave old vmspace. */ movq %rsi,%r12 movq %rdi,%r13 @@ -238,6 +233,8 @@ done_load_dr: movq PCB_RBX(%r8),%rbx movq PCB_RIP(%r8),%rax movq %rax,(%rsp) + movq PCPU(CURTHREAD),%rdi + call fpu_activate_sw ret /* Modified: head/sys/amd64/amd64/fpu.c ============================================================================== --- head/sys/amd64/amd64/fpu.c Wed Jun 13 17:42:55 2018 (r335071) +++ head/sys/amd64/amd64/fpu.c Wed Jun 13 17:55:09 2018 (r335072) @@ -142,6 +142,11 @@ static void fpu_clean_state(void); SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware"); +int lazy_fpu_switch = 0; +SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, + &lazy_fpu_switch, 0, + "Lazily load FPU context after context switch"); + int use_xsave; /* non-static for cpu_switch.S */ uint64_t xsave_mask; /* the same */ static uma_zone_t fpu_save_area_zone; @@ -242,6 +247,7 @@ fpuinit_bsp1(void) uint64_t xsave_mask_user; bool old_wp; + TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); if (!use_xsave) return; cpuid_count(0xd, 0x0, cp); @@ -651,6 +657,45 @@ fputrap_sse(void) return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); } +static void +restore_fpu_curthread(struct thread *td) +{ + struct pcb *pcb; + + /* + * Record new context early in case frstor causes a trap. + */ + PCPU_SET(fpcurthread, td); + + stop_emulating(); + fpu_clean_state(); + pcb = td->td_pcb; + + if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { + /* + * This is the first time this thread has used the FPU or + * the PCB doesn't contain a clean FPU state. Explicitly + * load an initial state. + * + * We prefer to restore the state from the actual save + * area in PCB instead of directly loading from + * fpu_initialstate, to ignite the XSAVEOPT + * tracking engine. + */ + bcopy(fpu_initialstate, pcb->pcb_save, + cpu_max_ext_state_size); + fpurestore(pcb->pcb_save); + if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) + fldcw(pcb->pcb_initial_fpucw); + if (PCB_USER_FPU(pcb)) + set_pcb_flags(pcb, PCB_FPUINITDONE | + PCB_USERFPUINITDONE); + else + set_pcb_flags(pcb, PCB_FPUINITDONE); + } else + fpurestore(pcb->pcb_save); +} + /* * Device Not Available (DNA, #NM) exception handler. * @@ -661,7 +706,9 @@ fputrap_sse(void) void fpudna(void) { + struct thread *td; + td = curthread; /* * This handler is entered with interrupts enabled, so context * switches may occur before critical_enter() is executed. If @@ -675,7 +722,7 @@ fpudna(void) KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); - if (PCPU_GET(fpcurthread) == curthread) { + if (PCPU_GET(fpcurthread) == td) { printf("fpudna: fpcurthread == curthread\n"); stop_emulating(); critical_exit(); @@ -684,40 +731,24 @@ fpudna(void) if (PCPU_GET(fpcurthread) != NULL) { panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, - curthread, curthread->td_tid); + td, td->td_tid); } - stop_emulating(); - /* - * Record new context early in case frstor causes a trap. - */ - PCPU_SET(fpcurthread, curthread); + restore_fpu_curthread(td); + critical_exit(); +} - fpu_clean_state(); +void fpu_activate_sw(struct thread *td); /* Called from the context switch */ +void +fpu_activate_sw(struct thread *td) +{ - if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) { - /* - * This is the first time this thread has used the FPU or - * the PCB doesn't contain a clean FPU state. Explicitly - * load an initial state. - * - * We prefer to restore the state from the actual save - * area in PCB instead of directly loading from - * fpu_initialstate, to ignite the XSAVEOPT - * tracking engine. - */ - bcopy(fpu_initialstate, curpcb->pcb_save, - cpu_max_ext_state_size); - fpurestore(curpcb->pcb_save); - if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__) - fldcw(curpcb->pcb_initial_fpucw); - if (PCB_USER_FPU(curpcb)) - set_pcb_flags(curpcb, - PCB_FPUINITDONE | PCB_USERFPUINITDONE); - else - set_pcb_flags(curpcb, PCB_FPUINITDONE); - } else - fpurestore(curpcb->pcb_save); - critical_exit(); + if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || + !PCB_USER_FPU(td->td_pcb)) { + PCPU_SET(fpcurthread, NULL); + start_emulating(); + } else if (PCPU_GET(fpcurthread) != td) { + restore_fpu_curthread(td); + } } void