Date: Thu, 8 Nov 2018 22:42:56 +0000 (UTC) From: John Baldwin <jhb@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r340270 - in stable/11/sys: amd64/include arm/include arm64/include i386/include kern mips/include powerpc/include riscv/include sparc64/include x86/x86 Message-ID: <201811082242.wA8MguJu018222@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jhb Date: Thu Nov 8 22:42:55 2018 New Revision: 340270 URL: https://svnweb.freebsd.org/changeset/base/340270 Log: MFC 340164,340168,340170: Add custom cpu_lock_delay() for x86. 340164: Add a KPI for the delay while spinning on a spin lock. Replace a call to DELAY(1) with a new cpu_lock_delay() KPI. Currently cpu_lock_delay() is defined to DELAY(1) on all platforms. However, platforms with a DELAY() implementation that uses spin locks should implement a custom cpu_lock_delay() doesn't use locks. 340168: Add a delay_tsc() static function for when DELAY() uses the TSC. This uses slightly simpler logic than the existing code by using the full 64-bit counter and thus not having to worry about counter overflow. 340170: Add a custom implementation of cpu_lock_delay() for x86. Avoid using DELAY() since it can try to use spin locks on CPUs without a P-state invariant TSC. For cpu_lock_delay(), always use the TSC if it exists (even if it is not P-state invariant) to delay for a microsecond. If the TSC does not exist, read from I/O port 0x84 to delay instead. PR: 228768 Modified: stable/11/sys/amd64/include/cpu.h stable/11/sys/arm/include/cpu.h stable/11/sys/arm64/include/cpu.h stable/11/sys/i386/include/cpu.h stable/11/sys/kern/kern_mutex.c stable/11/sys/mips/include/cpu.h stable/11/sys/powerpc/include/cpu.h stable/11/sys/riscv/include/cpu.h stable/11/sys/sparc64/include/cpu.h stable/11/sys/x86/x86/delay.c Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/amd64/include/cpu.h ============================================================================== --- stable/11/sys/amd64/include/cpu.h Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/amd64/include/cpu.h Thu Nov 8 22:42:55 2018 (r340270) @@ -73,6 +73,7 @@ extern char etext[]; extern void (*vmm_resume_p)(void); void cpu_halt(void); +void cpu_lock_delay(void); void cpu_reset(void); void fork_trampoline(void); void swi_vm(void *); Modified: stable/11/sys/arm/include/cpu.h ============================================================================== --- stable/11/sys/arm/include/cpu.h Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/arm/include/cpu.h Thu Nov 8 22:42:55 2018 (r340270) @@ -61,6 +61,7 @@ get_cyclecount(void) #define cpu_getstack(td) ((td)->td_frame->tf_usr_sp) #define cpu_setstack(td, sp) ((td)->td_frame->tf_usr_sp = (sp)) #define cpu_spinwait() /* nothing */ +#define cpu_lock_delay() DELAY(1) #define ARM_NVEC 8 #define ARM_VEC_ALL 0xffffffff Modified: stable/11/sys/arm64/include/cpu.h ============================================================================== --- stable/11/sys/arm64/include/cpu.h Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/arm64/include/cpu.h Thu Nov 8 22:42:55 2018 (r340270) @@ -51,6 +51,7 @@ #define cpu_getstack(td) ((td)->td_frame->tf_sp) #define cpu_setstack(td, sp) ((td)->td_frame->tf_sp = (sp)) #define cpu_spinwait() __asm __volatile("yield" ::: "memory") +#define cpu_lock_delay() DELAY(1) /* Extract CPU affinity levels 0-3 */ #define CPU_AFF0(mpidr) (u_int)(((mpidr) >> 0) & 0xff) Modified: stable/11/sys/i386/include/cpu.h ============================================================================== --- stable/11/sys/i386/include/cpu.h Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/i386/include/cpu.h Thu Nov 8 22:42:55 2018 (r340270) @@ -70,6 +70,7 @@ extern char btext[]; extern char etext[]; void cpu_halt(void); +void cpu_lock_delay(void); void cpu_reset(void); void fork_trampoline(void); void swi_vm(void *); Modified: stable/11/sys/kern/kern_mutex.c ============================================================================== --- stable/11/sys/kern/kern_mutex.c Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/kern/kern_mutex.c Thu Nov 8 22:42:55 2018 (r340270) @@ -1204,7 +1204,7 @@ _mtx_lock_indefinite_check(struct mtx *m, struct lock_ ldap->spin_cnt++; if (ldap->spin_cnt < 60000000 || kdb_active || panicstr != NULL) - DELAY(1); + cpu_lock_delay(); else { td = mtx_owner(m); Modified: stable/11/sys/mips/include/cpu.h ============================================================================== --- stable/11/sys/mips/include/cpu.h Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/mips/include/cpu.h Thu Nov 8 22:42:55 2018 (r340270) @@ -69,6 +69,7 @@ #define cpu_getstack(td) ((td)->td_frame->sp) #define cpu_setstack(td, nsp) ((td)->td_frame->sp = (nsp)) #define cpu_spinwait() /* nothing */ +#define cpu_lock_delay() DELAY(1) /* * A machine-independent interface to the CPU's counter. Modified: stable/11/sys/powerpc/include/cpu.h ============================================================================== --- stable/11/sys/powerpc/include/cpu.h Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/powerpc/include/cpu.h Thu Nov 8 22:42:55 2018 (r340270) @@ -101,6 +101,7 @@ get_cyclecount(void) #define cpu_getstack(td) ((td)->td_frame->fixreg[1]) #define cpu_spinwait() __asm __volatile("or 27,27,27") /* yield */ +#define cpu_lock_delay() DELAY(1) extern char btext[]; extern char etext[]; Modified: stable/11/sys/riscv/include/cpu.h ============================================================================== --- stable/11/sys/riscv/include/cpu.h Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/riscv/include/cpu.h Thu Nov 8 22:42:55 2018 (r340270) @@ -46,6 +46,7 @@ #define cpu_getstack(td) ((td)->td_frame->tf_sp) #define cpu_setstack(td, sp) ((td)->td_frame->tf_sp = (sp)) #define cpu_spinwait() /* nothing */ +#define cpu_lock_delay() DELAY(1) #ifdef _KERNEL Modified: stable/11/sys/sparc64/include/cpu.h ============================================================================== --- stable/11/sys/sparc64/include/cpu.h Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/sparc64/include/cpu.h Thu Nov 8 22:42:55 2018 (r340270) @@ -46,6 +46,7 @@ #define cpu_getstack(td) ((td)->td_frame->tf_sp) #define cpu_setstack(td, sp) ((td)->td_frame->tf_sp = (sp)) #define cpu_spinwait() /* nothing */ +#define cpu_lock_delay() DELAY(1) #ifdef _KERNEL Modified: stable/11/sys/x86/x86/delay.c ============================================================================== --- stable/11/sys/x86/x86/delay.c Thu Nov 8 22:39:38 2018 (r340269) +++ stable/11/sys/x86/x86/delay.c Thu Nov 8 22:42:55 2018 (r340270) @@ -49,11 +49,23 @@ __FBSDID("$FreeBSD$"); #include <machine/cpu.h> #include <x86/init.h> -static u_int -get_tsc(__unused struct timecounter *tc) +static void +delay_tsc(int n) { + uint64_t end, now; - return (rdtsc32()); + /* + * Pin the current thread ensure correct behavior if the TSCs + * on different CPUs are not in sync. + */ + sched_pin(); + now = rdtsc(); + end = now + tsc_freq * n / 1000000; + do { + cpu_spinwait(); + now = rdtsc(); + } while (now < end); + sched_unpin(); } static int @@ -64,22 +76,24 @@ delay_tc(int n) uint64_t end, freq, now; u_int last, mask, u; - tc = timecounter; - freq = atomic_load_acq_64(&tsc_freq); - if (tsc_is_invariant && freq != 0) { - func = get_tsc; - mask = ~0u; - } else { - if (tc->tc_quality <= 0) - return (0); - func = tc->tc_get_timecount; - mask = tc->tc_counter_mask; - freq = tc->tc_frequency; + /* + * Only use the TSC if it is P-state invariant. If the TSC is + * not P-state invariant and the CPU is not running at the + * "full" P-state, then the TSC will increment at some rate + * less than tsc_freq and delay_tsc() will wait too long. + */ + if (tsc_is_invariant && tsc_freq != 0) { + delay_tsc(n); + return (1); } + tc = timecounter; + if (tc->tc_quality <= 0) + return (0); + func = tc->tc_get_timecount; + mask = tc->tc_counter_mask; + freq = tc->tc_frequency; now = 0; end = freq * n / 1000000; - if (func == get_tsc) - sched_pin(); last = func(tc) & mask; do { cpu_spinwait(); @@ -90,8 +104,6 @@ delay_tc(int n) now += u - last; last = u; } while (now < end); - if (func == get_tsc) - sched_unpin(); return (1); } @@ -103,4 +115,23 @@ DELAY(int n) return; init_ops.early_delay(n); +} + +void +cpu_lock_delay(void) +{ + + /* + * Use TSC to wait for a usec if present, otherwise fall back + * to reading from port 0x84. We can't call into timecounters + * for this delay since timecounters might use spin locks. + * + * Note that unlike delay_tc(), this uses the TSC even if it + * is not P-state invariant. For this function it is ok to + * wait even a few usecs. + */ + if (tsc_freq != 0) + delay_tsc(1); + else + inb(0x84); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811082242.wA8MguJu018222>