Date: Wed, 10 Apr 2013 05:59:07 +0000 (UTC) From: Neel Natu <neel@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r249324 - in head: sys/amd64/include sys/amd64/vmm sys/x86/x86 usr.sbin/bhyve Message-ID: <201304100559.r3A5x7PF004306@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: neel Date: Wed Apr 10 05:59:07 2013 New Revision: 249324 URL: http://svnweb.freebsd.org/changeset/base/249324 Log: Unsynchronized TSCs on the host require special handling in bhyve: - use clock_gettime(2) as the time base for the emulated ACPI timer instead of directly using rdtsc(). - don't advertise the invariant TSC capability to the guest to discourage it from using the TSC as its time base. Discussed with: jhb@ (about making 'smp_tsc' a global) Reported by: Dan Mack on freebsd-virtualization@ Obtained from: NetApp Modified: head/sys/amd64/include/clock.h head/sys/amd64/vmm/x86.c head/sys/x86/x86/tsc.c head/usr.sbin/bhyve/pmtmr.c Modified: head/sys/amd64/include/clock.h ============================================================================== --- head/sys/amd64/include/clock.h Wed Apr 10 02:40:03 2013 (r249323) +++ head/sys/amd64/include/clock.h Wed Apr 10 05:59:07 2013 (r249324) @@ -20,6 +20,9 @@ extern int i8254_max_count; extern uint64_t tsc_freq; extern int tsc_is_invariant; extern int tsc_perf_stat; +#ifdef SMP +extern int smp_tsc; +#endif void i8254_init(void); Modified: head/sys/amd64/vmm/x86.c ============================================================================== --- head/sys/amd64/vmm/x86.c Wed Apr 10 02:40:03 2013 (r249323) +++ head/sys/amd64/vmm/x86.c Wed Apr 10 05:59:07 2013 (r249324) @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/cpuset.h> +#include <machine/clock.h> #include <machine/cpufunc.h> #include <machine/md_var.h> #include <machine/specialreg.h> @@ -89,11 +90,27 @@ x86_emulate_cpuid(struct vm *vm, int vcp case CPUID_8000_0003: case CPUID_8000_0004: case CPUID_8000_0006: - case CPUID_8000_0007: case CPUID_8000_0008: cpuid_count(*eax, *ecx, regs); break; + case CPUID_8000_0007: + cpuid_count(*eax, *ecx, regs); + /* + * If the host TSCs are not synchronized across + * physical cpus then we cannot advertise an + * invariant tsc to a vcpu. + * + * XXX This still falls short because the vcpu + * can observe the TSC moving backwards as it + * migrates across physical cpus. But at least + * it should discourage the guest from using the + * TSC to keep track of time. + */ + if (!smp_tsc) + regs[3] &= ~AMDPM_TSC_INVARIANT; + break; + case CPUID_0000_0001: do_cpuid(1, regs); Modified: head/sys/x86/x86/tsc.c ============================================================================== --- head/sys/x86/x86/tsc.c Wed Apr 10 02:40:03 2013 (r249323) +++ head/sys/x86/x86/tsc.c Wed Apr 10 05:59:07 2013 (r249324) @@ -61,7 +61,7 @@ SYSCTL_INT(_kern_timecounter, OID_AUTO, TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant); #ifdef SMP -static int smp_tsc; +int smp_tsc; SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, "Indicates whether the TSC is safe to use in SMP mode"); TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc); Modified: head/usr.sbin/bhyve/pmtmr.c ============================================================================== --- head/usr.sbin/bhyve/pmtmr.c Wed Apr 10 02:40:03 2013 (r249323) +++ head/usr.sbin/bhyve/pmtmr.c Wed Apr 10 05:59:07 2013 (r249324) @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cpufunc.h> #include <stdio.h> +#include <stdlib.h> #include <time.h> #include <assert.h> #include <pthread.h> @@ -53,35 +54,108 @@ __FBSDID("$FreeBSD$"); #define PMTMR_FREQ 3579545 /* 3.579545MHz */ static pthread_mutex_t pmtmr_mtx; -static uint64_t pmtmr_tscf; + static uint64_t pmtmr_old; + +static uint64_t pmtmr_tscf; static uint64_t pmtmr_tsc_old; +static clockid_t clockid = CLOCK_UPTIME_FAST; +static struct timespec pmtmr_uptime_old; + +#define timespecsub(vvp, uvp) \ + do { \ + (vvp)->tv_sec -= (uvp)->tv_sec; \ + (vvp)->tv_nsec -= (uvp)->tv_nsec; \ + if ((vvp)->tv_nsec < 0) { \ + (vvp)->tv_sec--; \ + (vvp)->tv_nsec += 1000000000; \ + } \ + } while (0) + +static uint64_t +timespec_to_pmtmr(const struct timespec *tsnew, const struct timespec *tsold) +{ + struct timespec tsdiff; + int64_t nsecs; + + tsdiff = *tsnew; + timespecsub(&tsdiff, tsold); + nsecs = tsdiff.tv_sec * 1000000000 + tsdiff.tv_nsec; + assert(nsecs >= 0); + + return (nsecs * PMTMR_FREQ / 1000000000 + pmtmr_old); +} + +static uint64_t +tsc_to_pmtmr(uint64_t tsc_new, uint64_t tsc_old) +{ + + return ((tsc_new - tsc_old) * PMTMR_FREQ / pmtmr_tscf + pmtmr_old); +} + +static void +pmtmr_init(void) +{ + size_t len; + int smp_tsc, err; + struct timespec tsnew, tsold = { 0 }; + + len = sizeof(smp_tsc); + err = sysctlbyname("kern.timecounter.smp_tsc", &smp_tsc, &len, NULL, 0); + assert(err == 0); + + if (smp_tsc) { + len = sizeof(pmtmr_tscf); + err = sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len, + NULL, 0); + assert(err == 0); + + pmtmr_tsc_old = rdtsc(); + pmtmr_old = tsc_to_pmtmr(pmtmr_tsc_old, 0); + } else { + if (getenv("BHYVE_PMTMR_PRECISE") != NULL) + clockid = CLOCK_UPTIME; + + err = clock_gettime(clockid, &tsnew); + assert(err == 0); + + pmtmr_uptime_old = tsnew; + pmtmr_old = timespec_to_pmtmr(&tsnew, &tsold); + } +} + static uint32_t pmtmr_val(void) { + struct timespec tsnew; uint64_t pmtmr_tsc_new; uint64_t pmtmr_new; + int error; + static int inited = 0; if (!inited) { - size_t len; - - inited = 1; pthread_mutex_init(&pmtmr_mtx, NULL); - len = sizeof(pmtmr_tscf); - sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len, - NULL, 0); - pmtmr_tsc_old = rdtsc(); - pmtmr_old = pmtmr_tsc_old / pmtmr_tscf * PMTMR_FREQ; + pmtmr_init(); + inited = 1; } pthread_mutex_lock(&pmtmr_mtx); - pmtmr_tsc_new = rdtsc(); - pmtmr_new = (pmtmr_tsc_new - pmtmr_tsc_old) * PMTMR_FREQ / pmtmr_tscf + - pmtmr_old; + + if (pmtmr_tscf) { + pmtmr_tsc_new = rdtsc(); + pmtmr_new = tsc_to_pmtmr(pmtmr_tsc_new, pmtmr_tsc_old); + pmtmr_tsc_old = pmtmr_tsc_new; + } else { + error = clock_gettime(clockid, &tsnew); + assert(error == 0); + + pmtmr_new = timespec_to_pmtmr(&tsnew, &pmtmr_uptime_old); + pmtmr_uptime_old = tsnew; + } pmtmr_old = pmtmr_new; - pmtmr_tsc_old = pmtmr_tsc_new; + pthread_mutex_unlock(&pmtmr_mtx); return (pmtmr_new); @@ -102,4 +176,3 @@ pmtmr_handler(struct vmctx *ctx, int vcp } INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler); -
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201304100559.r3A5x7PF004306>