Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 8 Nov 2018 22:42:56 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r340270 - in stable/11/sys: amd64/include arm/include arm64/include i386/include kern mips/include powerpc/include riscv/include sparc64/include x86/x86
Message-ID:  <201811082242.wA8MguJu018222@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Thu Nov  8 22:42:55 2018
New Revision: 340270
URL: https://svnweb.freebsd.org/changeset/base/340270

Log:
  MFC 340164,340168,340170: Add custom cpu_lock_delay() for x86.
  
  340164:
  Add a KPI for the delay while spinning on a spin lock.
  
  Replace a call to DELAY(1) with a new cpu_lock_delay() KPI.  Currently
  cpu_lock_delay() is defined to DELAY(1) on all platforms.  However,
  platforms with a DELAY() implementation that uses spin locks should
  implement a custom cpu_lock_delay() doesn't use locks.
  
  340168:
  Add a delay_tsc() static function for when DELAY() uses the TSC.
  
  This uses slightly simpler logic than the existing code by using the
  full 64-bit counter and thus not having to worry about counter
  overflow.
  
  340170:
  Add a custom implementation of cpu_lock_delay() for x86.
  
  Avoid using DELAY() since it can try to use spin locks on CPUs without
  a P-state invariant TSC.  For cpu_lock_delay(), always use the TSC if
  it exists (even if it is not P-state invariant) to delay for a
  microsecond.  If the TSC does not exist, read from I/O port 0x84 to
  delay instead.
  
  PR:		228768

Modified:
  stable/11/sys/amd64/include/cpu.h
  stable/11/sys/arm/include/cpu.h
  stable/11/sys/arm64/include/cpu.h
  stable/11/sys/i386/include/cpu.h
  stable/11/sys/kern/kern_mutex.c
  stable/11/sys/mips/include/cpu.h
  stable/11/sys/powerpc/include/cpu.h
  stable/11/sys/riscv/include/cpu.h
  stable/11/sys/sparc64/include/cpu.h
  stable/11/sys/x86/x86/delay.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/amd64/include/cpu.h
==============================================================================
--- stable/11/sys/amd64/include/cpu.h	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/amd64/include/cpu.h	Thu Nov  8 22:42:55 2018	(r340270)
@@ -73,6 +73,7 @@ extern char	etext[];
 extern	void (*vmm_resume_p)(void);
 
 void	cpu_halt(void);
+void	cpu_lock_delay(void);
 void	cpu_reset(void);
 void	fork_trampoline(void);
 void	swi_vm(void *);

Modified: stable/11/sys/arm/include/cpu.h
==============================================================================
--- stable/11/sys/arm/include/cpu.h	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/arm/include/cpu.h	Thu Nov  8 22:42:55 2018	(r340270)
@@ -61,6 +61,7 @@ get_cyclecount(void)
 #define cpu_getstack(td)	((td)->td_frame->tf_usr_sp)
 #define cpu_setstack(td, sp)	((td)->td_frame->tf_usr_sp = (sp))
 #define cpu_spinwait()		/* nothing */
+#define	cpu_lock_delay()	DELAY(1)
 
 #define ARM_NVEC		8
 #define ARM_VEC_ALL		0xffffffff

Modified: stable/11/sys/arm64/include/cpu.h
==============================================================================
--- stable/11/sys/arm64/include/cpu.h	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/arm64/include/cpu.h	Thu Nov  8 22:42:55 2018	(r340270)
@@ -51,6 +51,7 @@
 #define	cpu_getstack(td)	((td)->td_frame->tf_sp)
 #define	cpu_setstack(td, sp)	((td)->td_frame->tf_sp = (sp))
 #define	cpu_spinwait()		__asm __volatile("yield" ::: "memory")
+#define	cpu_lock_delay()	DELAY(1)
 
 /* Extract CPU affinity levels 0-3 */
 #define	CPU_AFF0(mpidr)	(u_int)(((mpidr) >> 0) & 0xff)

Modified: stable/11/sys/i386/include/cpu.h
==============================================================================
--- stable/11/sys/i386/include/cpu.h	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/i386/include/cpu.h	Thu Nov  8 22:42:55 2018	(r340270)
@@ -70,6 +70,7 @@ extern char	btext[];
 extern char	etext[];
 
 void	cpu_halt(void);
+void	cpu_lock_delay(void);
 void	cpu_reset(void);
 void	fork_trampoline(void);
 void	swi_vm(void *);

Modified: stable/11/sys/kern/kern_mutex.c
==============================================================================
--- stable/11/sys/kern/kern_mutex.c	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/kern/kern_mutex.c	Thu Nov  8 22:42:55 2018	(r340270)
@@ -1204,7 +1204,7 @@ _mtx_lock_indefinite_check(struct mtx *m, struct lock_
 
 	ldap->spin_cnt++;
 	if (ldap->spin_cnt < 60000000 || kdb_active || panicstr != NULL)
-		DELAY(1);
+		cpu_lock_delay();
 	else {
 		td = mtx_owner(m);
 

Modified: stable/11/sys/mips/include/cpu.h
==============================================================================
--- stable/11/sys/mips/include/cpu.h	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/mips/include/cpu.h	Thu Nov  8 22:42:55 2018	(r340270)
@@ -69,6 +69,7 @@
 #define	cpu_getstack(td)	((td)->td_frame->sp)
 #define	cpu_setstack(td, nsp)	((td)->td_frame->sp = (nsp))
 #define	cpu_spinwait()		/* nothing */
+#define	cpu_lock_delay()	DELAY(1)
 
 /*
  * A machine-independent interface to the CPU's counter.

Modified: stable/11/sys/powerpc/include/cpu.h
==============================================================================
--- stable/11/sys/powerpc/include/cpu.h	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/powerpc/include/cpu.h	Thu Nov  8 22:42:55 2018	(r340270)
@@ -101,6 +101,7 @@ get_cyclecount(void)
 
 #define	cpu_getstack(td)	((td)->td_frame->fixreg[1])
 #define	cpu_spinwait()		__asm __volatile("or 27,27,27") /* yield */
+#define	cpu_lock_delay()	DELAY(1)
 
 extern char btext[];
 extern char etext[];

Modified: stable/11/sys/riscv/include/cpu.h
==============================================================================
--- stable/11/sys/riscv/include/cpu.h	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/riscv/include/cpu.h	Thu Nov  8 22:42:55 2018	(r340270)
@@ -46,6 +46,7 @@
 #define	cpu_getstack(td)	((td)->td_frame->tf_sp)
 #define	cpu_setstack(td, sp)	((td)->td_frame->tf_sp = (sp))
 #define	cpu_spinwait()		/* nothing */
+#define	cpu_lock_delay()	DELAY(1)
 
 #ifdef _KERNEL
 

Modified: stable/11/sys/sparc64/include/cpu.h
==============================================================================
--- stable/11/sys/sparc64/include/cpu.h	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/sparc64/include/cpu.h	Thu Nov  8 22:42:55 2018	(r340270)
@@ -46,6 +46,7 @@
 #define	cpu_getstack(td)	((td)->td_frame->tf_sp)
 #define	cpu_setstack(td, sp)	((td)->td_frame->tf_sp = (sp))
 #define	cpu_spinwait()		/* nothing */
+#define	cpu_lock_delay()	DELAY(1)
 
 #ifdef _KERNEL
 

Modified: stable/11/sys/x86/x86/delay.c
==============================================================================
--- stable/11/sys/x86/x86/delay.c	Thu Nov  8 22:39:38 2018	(r340269)
+++ stable/11/sys/x86/x86/delay.c	Thu Nov  8 22:42:55 2018	(r340270)
@@ -49,11 +49,23 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpu.h>
 #include <x86/init.h>
 
-static u_int
-get_tsc(__unused struct timecounter *tc)
+static void
+delay_tsc(int n)
 {
+	uint64_t end, now;
 
-	return (rdtsc32());
+	/*
+	 * Pin the current thread ensure correct behavior if the TSCs
+	 * on different CPUs are not in sync.
+	 */
+	sched_pin();
+	now = rdtsc();
+	end = now + tsc_freq * n / 1000000;
+	do {
+		cpu_spinwait();
+		now = rdtsc();
+	} while (now < end);
+	sched_unpin();
 }
 
 static int
@@ -64,22 +76,24 @@ delay_tc(int n)
 	uint64_t end, freq, now;
 	u_int last, mask, u;
 
-	tc = timecounter;
-	freq = atomic_load_acq_64(&tsc_freq);
-	if (tsc_is_invariant && freq != 0) {
-		func = get_tsc;
-		mask = ~0u;
-	} else {
-		if (tc->tc_quality <= 0)
-			return (0);
-		func = tc->tc_get_timecount;
-		mask = tc->tc_counter_mask;
-		freq = tc->tc_frequency;
+	/*
+	 * Only use the TSC if it is P-state invariant.  If the TSC is
+	 * not P-state invariant and the CPU is not running at the
+	 * "full" P-state, then the TSC will increment at some rate
+	 * less than tsc_freq and delay_tsc() will wait too long.
+	 */
+	if (tsc_is_invariant && tsc_freq != 0) {
+		delay_tsc(n);
+		return (1);
 	}
+	tc = timecounter;
+	if (tc->tc_quality <= 0)
+		return (0);
+	func = tc->tc_get_timecount;
+	mask = tc->tc_counter_mask;
+	freq = tc->tc_frequency;
 	now = 0;
 	end = freq * n / 1000000;
-	if (func == get_tsc)
-		sched_pin();
 	last = func(tc) & mask;
 	do {
 		cpu_spinwait();
@@ -90,8 +104,6 @@ delay_tc(int n)
 			now += u - last;
 		last = u;
 	} while (now < end);
-	if (func == get_tsc)
-		sched_unpin();
 	return (1);
 }
 
@@ -103,4 +115,23 @@ DELAY(int n)
 		return;
 
 	init_ops.early_delay(n);
+}
+
+void
+cpu_lock_delay(void)
+{
+
+	/*
+	 * Use TSC to wait for a usec if present, otherwise fall back
+	 * to reading from port 0x84.  We can't call into timecounters
+	 * for this delay since timecounters might use spin locks.
+	 *
+	 * Note that unlike delay_tc(), this uses the TSC even if it
+	 * is not P-state invariant.  For this function it is ok to
+	 * wait even a few usecs.
+	 */
+	if (tsc_freq != 0)
+		delay_tsc(1);
+	else
+		inb(0x84);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811082242.wA8MguJu018222>