Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 13 Jun 2018 17:55:10 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r335072 - head/sys/amd64/amd64
Message-ID:  <201806131755.w5DHtAwo073314@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Wed Jun 13 17:55:09 2018
New Revision: 335072
URL: https://svnweb.freebsd.org/changeset/base/335072

Log:
  Enable eager FPU context switch by default on amd64.
  
  With compilers making increasing use of vector instructions the
  performance benefit of lazily switching FPU state is no longer a
  desirable tradeoff.  Linux switched to eager FPU context switch some
  time ago, and the idea was floated on the FreeBSD-current mailing list
  some years ago[1].
  
  Enable eager FPU context switch by default on amd64, with a tunable/sysctl
  available to turn it back off.
  
  [1] https://lists.freebsd.org/pipermail/freebsd-current/2015-March/055198.html
  
  Reviewed by:	jhb
  Tested by:	pho
  Sponsored by:	The FreeBSD Foundation

Modified:
  head/sys/amd64/amd64/cpu_switch.S
  head/sys/amd64/amd64/fpu.c

Modified: head/sys/amd64/amd64/cpu_switch.S
==============================================================================
--- head/sys/amd64/amd64/cpu_switch.S	Wed Jun 13 17:42:55 2018	(r335071)
+++ head/sys/amd64/amd64/cpu_switch.S	Wed Jun 13 17:55:09 2018	(r335072)
@@ -128,10 +128,10 @@ done_store_dr:
 
 	/* have we used fp, and need a save? */
 	cmpq	%rdi,PCPU(FPCURTHREAD)
-	jne	3f
+	jne	2f
 	movq	PCB_SAVEFPU(%r8),%r8
 	clts
-	cmpl	$0,use_xsave
+	cmpl	$0,use_xsave(%rip)
 	jne	1f
 	fxsave	(%r8)
 	jmp	2f
@@ -143,12 +143,7 @@ ctx_switch_xsave:
 	/* This is patched to xsaveopt if supported, see fpuinit_bsp1() */
 	xsave	(%r8)
 	movq	%rcx,%rdx
-2:	smsw	%ax
-	orb	$CR0_TS,%al
-	lmsw	%ax
-	xorl	%eax,%eax
-	movq	%rax,PCPU(FPCURTHREAD)
-3:
+2:
 	/* Save is done.  Now fire up new thread. Leave old vmspace. */
 	movq	%rsi,%r12
 	movq	%rdi,%r13
@@ -238,6 +233,8 @@ done_load_dr:
 	movq	PCB_RBX(%r8),%rbx
 	movq	PCB_RIP(%r8),%rax
 	movq	%rax,(%rsp)
+	movq	PCPU(CURTHREAD),%rdi
+	call	fpu_activate_sw
 	ret
 
 	/*

Modified: head/sys/amd64/amd64/fpu.c
==============================================================================
--- head/sys/amd64/amd64/fpu.c	Wed Jun 13 17:42:55 2018	(r335071)
+++ head/sys/amd64/amd64/fpu.c	Wed Jun 13 17:55:09 2018	(r335072)
@@ -142,6 +142,11 @@ static	void	fpu_clean_state(void);
 SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
     SYSCTL_NULL_INT_PTR, 1, "Floating point instructions executed in hardware");
 
+int lazy_fpu_switch = 0;
+SYSCTL_INT(_hw, OID_AUTO, lazy_fpu_switch, CTLFLAG_RWTUN | CTLFLAG_NOFETCH,
+    &lazy_fpu_switch, 0,
+    "Lazily load FPU context after context switch");
+
 int use_xsave;			/* non-static for cpu_switch.S */
 uint64_t xsave_mask;		/* the same */
 static	uma_zone_t fpu_save_area_zone;
@@ -242,6 +247,7 @@ fpuinit_bsp1(void)
 	uint64_t xsave_mask_user;
 	bool old_wp;
 
+	TUNABLE_INT_FETCH("hw.lazy_fpu_switch", &lazy_fpu_switch);
 	if (!use_xsave)
 		return;
 	cpuid_count(0xd, 0x0, cp);
@@ -651,6 +657,45 @@ fputrap_sse(void)
 	return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
 }
 
+static void
+restore_fpu_curthread(struct thread *td)
+{
+	struct pcb *pcb;
+
+	/*
+	 * Record new context early in case frstor causes a trap.
+	 */
+	PCPU_SET(fpcurthread, td);
+
+	stop_emulating();
+	fpu_clean_state();
+	pcb = td->td_pcb;
+
+	if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) {
+		/*
+		 * This is the first time this thread has used the FPU or
+		 * the PCB doesn't contain a clean FPU state.  Explicitly
+		 * load an initial state.
+		 *
+		 * We prefer to restore the state from the actual save
+		 * area in PCB instead of directly loading from
+		 * fpu_initialstate, to ignite the XSAVEOPT
+		 * tracking engine.
+		 */
+		bcopy(fpu_initialstate, pcb->pcb_save,
+		    cpu_max_ext_state_size);
+		fpurestore(pcb->pcb_save);
+		if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
+			fldcw(pcb->pcb_initial_fpucw);
+		if (PCB_USER_FPU(pcb))
+			set_pcb_flags(pcb, PCB_FPUINITDONE |
+			    PCB_USERFPUINITDONE);
+		else
+			set_pcb_flags(pcb, PCB_FPUINITDONE);
+	} else
+		fpurestore(pcb->pcb_save);
+}
+
 /*
  * Device Not Available (DNA, #NM) exception handler.
  *
@@ -661,7 +706,9 @@ fputrap_sse(void)
 void
 fpudna(void)
 {
+	struct thread *td;
 
+	td = curthread;
 	/*
 	 * This handler is entered with interrupts enabled, so context
 	 * switches may occur before critical_enter() is executed.  If
@@ -675,7 +722,7 @@ fpudna(void)
 
 	KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0,
 	    ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)"));
-	if (PCPU_GET(fpcurthread) == curthread) {
+	if (PCPU_GET(fpcurthread) == td) {
 		printf("fpudna: fpcurthread == curthread\n");
 		stop_emulating();
 		critical_exit();
@@ -684,40 +731,24 @@ fpudna(void)
 	if (PCPU_GET(fpcurthread) != NULL) {
 		panic("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n",
 		    PCPU_GET(fpcurthread), PCPU_GET(fpcurthread)->td_tid,
-		    curthread, curthread->td_tid);
+		    td, td->td_tid);
 	}
-	stop_emulating();
-	/*
-	 * Record new context early in case frstor causes a trap.
-	 */
-	PCPU_SET(fpcurthread, curthread);
+	restore_fpu_curthread(td);
+	critical_exit();
+}
 
-	fpu_clean_state();
+void fpu_activate_sw(struct thread *td); /* Called from the context switch */
+void
+fpu_activate_sw(struct thread *td)
+{
 
-	if ((curpcb->pcb_flags & PCB_FPUINITDONE) == 0) {
-		/*
-		 * This is the first time this thread has used the FPU or
-		 * the PCB doesn't contain a clean FPU state.  Explicitly
-		 * load an initial state.
-		 *
-		 * We prefer to restore the state from the actual save
-		 * area in PCB instead of directly loading from
-		 * fpu_initialstate, to ignite the XSAVEOPT
-		 * tracking engine.
-		 */
-		bcopy(fpu_initialstate, curpcb->pcb_save,
-		    cpu_max_ext_state_size);
-		fpurestore(curpcb->pcb_save);
-		if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
-			fldcw(curpcb->pcb_initial_fpucw);
-		if (PCB_USER_FPU(curpcb))
-			set_pcb_flags(curpcb,
-			    PCB_FPUINITDONE | PCB_USERFPUINITDONE);
-		else
-			set_pcb_flags(curpcb, PCB_FPUINITDONE);
-	} else
-		fpurestore(curpcb->pcb_save);
-	critical_exit();
+	if (lazy_fpu_switch || (td->td_pflags & TDP_KTHREAD) != 0 ||
+	    !PCB_USER_FPU(td->td_pcb)) {
+		PCPU_SET(fpcurthread, NULL);
+		start_emulating();
+	} else if (PCPU_GET(fpcurthread) != td) {
+		restore_fpu_curthread(td);
+	}
 }
 
 void



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201806131755.w5DHtAwo073314>