Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 13 Jun 2018 20:26:02 +0200
From:      Oliver Pinter <oliver.pinter@hardenedbsd.org>
To:        Konstantin Belousov <kib@freebsd.org>
Cc:        "src-committers@freebsd.org" <src-committers@freebsd.org>,  "svn-src-all@freebsd.org" <svn-src-all@freebsd.org>,  "svn-src-head@freebsd.org" <svn-src-head@freebsd.org>
Subject:   Re: svn commit: r335072 - head/sys/amd64/amd64
Message-ID:  <CAPQ4ffvCRQPQomySzb=BynDbuoWdcK_AgCGiUmJfDyPLwQ1A-g@mail.gmail.com>
In-Reply-To: <201806131755.w5DHtAwo073314@repo.freebsd.org>
References:  <201806131755.w5DHtAwo073314@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On Wednesday, June 13, 2018, Konstantin Belousov <kib@freebsd.org> wrote:

> Author: kib
> Date: Wed Jun 13 17:55:09 2018
> New Revision: 335072
> URL: https://svnweb.freebsd.org/changeset/base/335072
>
> Log:
>   Enable eager FPU context switch by default on amd64.
>
>   With compilers making increasing use of vector instructions the
>   performance benefit of lazily switching FPU state is no longer a
>   desirable tradeoff.  Linux switched to eager FPU context switch some
>   time ago, and the idea was floated on the FreeBSD-current mailing list
>   some years ago[1].
>
>   Enable eager FPU context switch by default on amd64, with a
> tunable/sysctl
>   available to turn it back off.
>
>   [1] https://lists.freebsd.org/pipermail/freebsd-current/
> 2015-March/055198.html
>
>
http://www.openwall.com/lists/oss-security/2018/06/13/7



>   Reviewed by:  jhb
>   Tested by:    pho
>   Sponsored by: The FreeBSD Foundation
>
> Modified:
>   head/sys/amd64/amd64/cpu_switch.S
>   head/sys/amd64/amd64/fpu.c
>
> Modified: head/sys/amd64/amd64/cpu_switch.S
> ============================================================
> ==================
> --- head/sys/amd64/amd64/cpu_switch.S   Wed Jun 13 17:42:55 2018
> (r335071)
> +++ head/sys/amd64/amd64/cpu_switch.S   Wed Jun 13 17:55:09 2018
> (r335072)
> @@ -128,10 +128,10 @@ done_store_dr:
>
>         /* have we used fp, and need a save? */
>         cmpq    %rdi,PCPU(FPCURTHREAD)
> -       jne     3f
> +       jne     2f
>         movq    PCB_SAVEFPU(%r8),%r8
>         clts
> -       cmpl    $0,use_xsave
> +       cmpl    $0,use_xsave(%rip)
>         jne     1f
>         fxsave  (%r8)
>         jmp     2f
> @@ -143,12 +143,7 @@ ctx_switch_xsave:
>         /* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
>         xsave   (%r8)
>         movq    %rcx,%rdx
> -2:     smsw    %ax
> -       orb     $CR0_TS,%al
> -       lmsw    %ax
> -       xorl    %eax,%eax
> -       movq    %rax,PCPU(FPCURTHREAD)
> -3:
> +2:
>         /* Save is done.  Now fire up new thread. Leave old vmspace. */
>         movq    %rsi,%r12
>         movq    %rdi,%r13
> @@ -238,6 +233,8 @@ done_load_dr:
>         movq    PCB_RBX(%r8),%rbx
>         movq    PCB_RIP(%r8),%rax
>         movq    %rax,(%rsp)
> +       movq    PCPU(CURTHREAD),%rdi
> +       call    fpu_activate_sw
>         ret
>
>         /*
>
> Modified: head/sys/amd64/amd64/fpu.c
> ============================================================
> ==================
> --- head/sys/amd64/amd64/fpu.c  Wed Jun 13 17:42:55 2018        (r335071)
> +++ head/sys/amd64/amd64/fpu.c  Wed Jun 13 17:55:09 2018        (r335072)
> @@ -142,6 +142,11 @@ static     void    fpu_clean_state(void);
>  SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
>      SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in
> hardware");
>
> +int lazy_fpu_switch = 0;
> +SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN |
> CTLFLAG_NOFETCH,
> +    &lazy_fpu_switch, 0,
> +    "Lazily load FPU context after context switch");
> +
>  int use_xsave;                 /* non-static for cpu_switch.S */
>  uint64_t xsave_mask;           /* the same */
>  static uma_zone_t fpu_save_area_zone;
> @@ -242,6 +247,7 @@ fpuinit_bsp1(void)
>         uint64_t xsave_mask_user;
>         bool old_wp;
>
> +       TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch);
>         if (!use_xsave)
>                 return;
>         cpuid_count(0xd, 0x0, cp);
> @@ -651,6 +657,45 @@ fputrap_sse(void)
>         return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
>  }
>
> +static void
> +restore_fpu_curthread(struct thread *td)
> +{
> +       struct pcb *pcb;
> +
> +       /*
> +        * Record new context early in case frstor causes a trap.
> +        */
> +       PCPU_SET(fpcurthread, td);
> +
> +       stop_emulating();
> +       fpu_clean_state();
> +       pcb = td->td_pcb;
> +
> +       if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
> +               /*
> +                * This is the first time this thread has used the FPU or
> +                * the PCB doesn't contain a clean FPU state.  Explicitly
> +                * load an initial state.
> +                *
> +                * We prefer to restore the state from the actual save
> +                * area in PCB instead of directly loading from
> +                * fpu_initialstate, to ignite the XSAVEOPT
> +                * tracking engine.
> +                */
> +               bcopy(fpu_initialstate, pcb->pcb_save,
> +                   cpu_max_ext_state_size);
> +               fpurestore(pcb->pcb_save);
> +               if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
> +                       fldcw(pcb->pcb_initial_fpucw);
> +               if (PCB_USER_FPU(pcb))
> +                       set_pcb_flags(pcb, PCB_FPUINITDONE |
> +                           PCB_USERFPUINITDONE);
> +               else
> +                       set_pcb_flags(pcb, PCB_FPUINITDONE);
> +       } else
> +               fpurestore(pcb->pcb_save);
> +}
> +
>  /*
>   * Device Not Available (DNA, #NM) exception handler.
>   *
> @@ -661,7 +706,9 @@ fputrap_sse(void)
>  void
>  fpudna(void)
>  {
> +       struct thread *td;
>
> +       td = curthread;
>         /*
>          * This handler is entered with interrupts enabled, so context
>          * switches may occur before critical_enter() is executed.  If
> @@ -675,7 +722,7 @@ fpudna(void)
>
>         KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0,
>             ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)"));
> -       if (PCPU_GET(fpcurthread) == curthread) {
> +       if (PCPU_GET(fpcurthread) == td) {
>                 printf("fpudna: fpcurthread == curthread\n");
>                 stop_emulating();
>                 critical_exit();
> @@ -684,40 +731,24 @@ fpudna(void)
>         if (PCPU_GET(fpcurthread) != NULL) {
>                 panic("fpudna: fpcurthread = %p (%d), curthread = %p
> (%d)\n",
>                     PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid,
> -                   curthread, curthread->td_tid);
> +                   td, td->td_tid);
>         }
> -       stop_emulating();
> -       /*
> -        * Record new context early in case frstor causes a trap.
> -        */
> -       PCPU_SET(fpcurthread, curthread);
> +       restore_fpu_curthread(td);
> +       critical_exit();
> +}
>
> -       fpu_clean_state();
> +void fpu_activate_sw(struct thread *td); /* Called from the context
> switch */
> +void
> +fpu_activate_sw(struct thread *td)
> +{
>
> -       if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) {
> -               /*
> -                * This is the first time this thread has used the FPU or
> -                * the PCB doesn't contain a clean FPU state.  Explicitly
> -                * load an initial state.
> -                *
> -                * We prefer to restore the state from the actual save
> -                * area in PCB instead of directly loading from
> -                * fpu_initialstate, to ignite the XSAVEOPT
> -                * tracking engine.
> -                */
> -               bcopy(fpu_initialstate, curpcb->pcb_save,
> -                   cpu_max_ext_state_size);
> -               fpurestore(curpcb->pcb_save);
> -               if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
> -                       fldcw(curpcb->pcb_initial_fpucw);
> -               if (PCB_USER_FPU(curpcb))
> -                       set_pcb_flags(curpcb,
> -                           PCB_FPUINITDONE | PCB_USERFPUINITDONE);
> -               else
> -                       set_pcb_flags(curpcb, PCB_FPUINITDONE);
> -       } else
> -               fpurestore(curpcb->pcb_save);
> -       critical_exit();
> +       if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 ||
> +           !PCB_USER_FPU(td->td_pcb)) {
> +               PCPU_SET(fpcurthread, NULL);
> +               start_emulating();
> +       } else if (PCPU_GET(fpcurthread) != td) {
> +               restore_fpu_curthread(td);
> +       }
>  }
>
>  void
> _______________________________________________
> svn-src-head@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscribe@freebsd.org"
>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAPQ4ffvCRQPQomySzb=BynDbuoWdcK_AgCGiUmJfDyPLwQ1A-g>