Date: Wed, 13 Jun 2018 20:26:02 +0200 From: Oliver Pinter <oliver.pinter@hardenedbsd.org> To: Konstantin Belousov <kib@freebsd.org> Cc: "src-committers@freebsd.org" <src-committers@freebsd.org>, "svn-src-all@freebsd.org" <svn-src-all@freebsd.org>, "svn-src-head@freebsd.org" <svn-src-head@freebsd.org> Subject: Re: svn commit: r335072 - head/sys/amd64/amd64 Message-ID: <CAPQ4ffvCRQPQomySzb=BynDbuoWdcK_AgCGiUmJfDyPLwQ1A-g@mail.gmail.com> In-Reply-To: <201806131755.w5DHtAwo073314@repo.freebsd.org> References: <201806131755.w5DHtAwo073314@repo.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On Wednesday, June 13, 2018, Konstantin Belousov <kib@freebsd.org> wrote: > Author: kib > Date: Wed Jun 13 17:55:09 2018 > New Revision: 335072 > URL: https://svnweb.freebsd.org/changeset/base/335072 > > Log: > Enable eager FPU context switch by default on amd64. > > With compilers making increasing use of vector instructions the > performance benefit of lazily switching FPU state is no longer a > desirable tradeoff. Linux switched to eager FPU context switch some > time ago, and the idea was floated on the FreeBSD-current mailing list > some years ago[1]. > > Enable eager FPU context switch by default on amd64, with a > tunable/sysctl > available to turn it back off. > > [1] https://lists.freebsd.org/pipermail/freebsd-current/ > 2015-March/055198.html > > http://www.openwall.com/lists/oss-security/2018/06/13/7 > Reviewed by: jhb > Tested by: pho > Sponsored by: The FreeBSD Foundation > > Modified: > head/sys/amd64/amd64/cpu_switch.S > head/sys/amd64/amd64/fpu.c > > Modified: head/sys/amd64/amd64/cpu_switch.S > ============================================================ > ================== > --- head/sys/amd64/amd64/cpu_switch.S Wed Jun 13 17:42:55 2018 > (r335071) > +++ head/sys/amd64/amd64/cpu_switch.S Wed Jun 13 17:55:09 2018 > (r335072) > @@ -128,10 +128,10 @@ done_store_dr: > > /* have we used fp, and need a save? */ > cmpq %rdi,PCPU(FPCURTHREAD) > - jne 3f > + jne 2f > movq PCB_SAVEFPU(%r8),%r8 > clts > - cmpl $0,use_xsave > + cmpl $0,use_xsave(%rip) > jne 1f > fxsave (%r8) > jmp 2f > @@ -143,12 +143,7 @@ ctx_switch_xsave: > /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */ > xsave (%r8) > movq %rcx,%rdx > -2: smsw %ax > - orb $CR0_TS,%al > - lmsw %ax > - xorl %eax,%eax > - movq %rax,PCPU(FPCURTHREAD) > -3: > +2: > /* Save is done. Now fire up new thread. Leave old vmspace. */ > movq %rsi,%r12 > movq %rdi,%r13 > @@ -238,6 +233,8 @@ done_load_dr: > movq PCB_RBX(%r8),%rbx > movq PCB_RIP(%r8),%rax > movq %rax,(%rsp) > + movq PCPU(CURTHREAD),%rdi > + call fpu_activate_sw > ret > > /* > > Modified: head/sys/amd64/amd64/fpu.c > ============================================================ > ================== > --- head/sys/amd64/amd64/fpu.c Wed Jun 13 17:42:55 2018 (r335071) > +++ head/sys/amd64/amd64/fpu.c Wed Jun 13 17:55:09 2018 (r335072) > @@ -142,6 +142,11 @@ static void fpu_clean_state(void); > SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, > SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in > hardware"); > > +int lazy_fpu_switch = 0; > +SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | > CTLFLAG_NOFETCH, > + &lazy_fpu_switch, 0, > + "Lazily load FPU context after context switch"); > + > int use_xsave; /* non-static for cpu_switch.S */ > uint64_t xsave_mask; /* the same */ > static uma_zone_t fpu_save_area_zone; > @@ -242,6 +247,7 @@ fpuinit_bsp1(void) > uint64_t xsave_mask_user; > bool old_wp; > > + TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch); > if (!use_xsave) > return; > cpuid_count(0xd, 0x0, cp); > @@ -651,6 +657,45 @@ fputrap_sse(void) > return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]); > } > > +static void > +restore_fpu_curthread(struct thread *td) > +{ > + struct pcb *pcb; > + > + /* > + * Record new context early in case frstor causes a trap. > + */ > + PCPU_SET(fpcurthread, td); > + > + stop_emulating(); > + fpu_clean_state(); > + pcb = td->td_pcb; > + > + if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { > + /* > + * This is the first time this thread has used the FPU or > + * the PCB doesn't contain a clean FPU state. Explicitly > + * load an initial state. > + * > + * We prefer to restore the state from the actual save > + * area in PCB instead of directly loading from > + * fpu_initialstate, to ignite the XSAVEOPT > + * tracking engine. > + */ > + bcopy(fpu_initialstate, pcb->pcb_save, > + cpu_max_ext_state_size); > + fpurestore(pcb->pcb_save); > + if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) > + fldcw(pcb->pcb_initial_fpucw); > + if (PCB_USER_FPU(pcb)) > + set_pcb_flags(pcb, PCB_FPUINITDONE | > + PCB_USERFPUINITDONE); > + else > + set_pcb_flags(pcb, PCB_FPUINITDONE); > + } else > + fpurestore(pcb->pcb_save); > +} > + > /* > * Device Not Available (DNA, #NM) exception handler. > * > @@ -661,7 +706,9 @@ fputrap_sse(void) > void > fpudna(void) > { > + struct thread *td; > > + td = curthread; > /* > * This handler is entered with interrupts enabled, so context > * switches may occur before critical_enter() is executed. If > @@ -675,7 +722,7 @@ fpudna(void) > > KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0, > ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)")); > - if (PCPU_GET(fpcurthread) == curthread) { > + if (PCPU_GET(fpcurthread) == td) { > printf("fpudna: fpcurthread == curthread\n"); > stop_emulating(); > critical_exit(); > @@ -684,40 +731,24 @@ fpudna(void) > if (PCPU_GET(fpcurthread) != NULL) { > panic("fpudna: fpcurthread = %p (%d), curthread = %p > (%d)\n", > PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid, > - curthread, curthread->td_tid); > + td, td->td_tid); > } > - stop_emulating(); > - /* > - * Record new context early in case frstor causes a trap. > - */ > - PCPU_SET(fpcurthread, curthread); > + restore_fpu_curthread(td); > + critical_exit(); > +} > > - fpu_clean_state(); > +void fpu_activate_sw(struct thread *td); /* Called from the context > switch */ > +void > +fpu_activate_sw(struct thread *td) > +{ > > - if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) { > - /* > - * This is the first time this thread has used the FPU or > - * the PCB doesn't contain a clean FPU state. Explicitly > - * load an initial state. > - * > - * We prefer to restore the state from the actual save > - * area in PCB instead of directly loading from > - * fpu_initialstate, to ignite the XSAVEOPT > - * tracking engine. > - */ > - bcopy(fpu_initialstate, curpcb->pcb_save, > - cpu_max_ext_state_size); > - fpurestore(curpcb->pcb_save); > - if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__) > - fldcw(curpcb->pcb_initial_fpucw); > - if (PCB_USER_FPU(curpcb)) > - set_pcb_flags(curpcb, > - PCB_FPUINITDONE | PCB_USERFPUINITDONE); > - else > - set_pcb_flags(curpcb, PCB_FPUINITDONE); > - } else > - fpurestore(curpcb->pcb_save); > - critical_exit(); > + if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 || > + !PCB_USER_FPU(td->td_pcb)) { > + PCPU_SET(fpcurthread, NULL); > + start_emulating(); > + } else if (PCPU_GET(fpcurthread) != td) { > + restore_fpu_curthread(td); > + } > } > > void > _______________________________________________ > svn-src-head@freebsd.org mailing list > https://lists.freebsd.org/mailman/listinfo/svn-src-head > To unsubscribe, send any mail to "svn-src-head-unsubscribe@freebsd.org" >
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAPQ4ffvCRQPQomySzb=BynDbuoWdcK_AgCGiUmJfDyPLwQ1A-g>