From owner-freebsd-arch@FreeBSD.ORG Mon Apr 20 16:21:59 2015 Return-Path: Delivered-To: arch@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id AE60665E; Mon, 20 Apr 2015 16:21:59 +0000 (UTC) Received: from kib.kiev.ua (kib.kiev.ua [IPv6:2001:470:d5e7:1::1]) (using TLSv1 with cipher DHE-RSA-CAMELLIA256-SHA (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 21809B3A; Mon, 20 Apr 2015 16:21:58 +0000 (UTC) Received: from tom.home (kostik@localhost [127.0.0.1]) by kib.kiev.ua (8.14.9/8.14.9) with ESMTP id t3KGLo1S072553 (version=TLSv1/SSLv3 cipher=DHE-RSA-CAMELLIA256-SHA bits=256 verify=NO); Mon, 20 Apr 2015 19:21:50 +0300 (EEST) (envelope-from kostikbel@gmail.com) DKIM-Filter: OpenDKIM Filter v2.9.2 kib.kiev.ua t3KGLo1S072553 Received: (from kostik@localhost) by tom.home (8.14.9/8.14.9/Submit) id t3KGLoJ6072552; Mon, 20 Apr 2015 19:21:50 +0300 (EEST) (envelope-from kostikbel@gmail.com) X-Authentication-Warning: tom.home: kostik set sender to kostikbel@gmail.com using -f Date: Mon, 20 Apr 2015 19:21:50 +0300 From: Konstantin Belousov To: arch@freebsd.org, amd64@freebsd.org Subject: Move x86 idle code to the x86/ common place. Message-ID: <20150420162149.GE2390@kib.kiev.ua> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.5.23 (2014-03-12) X-Spam-Status: No, score=-2.0 required=5.0 tests=ALL_TRUSTED,BAYES_00, DKIM_ADSP_CUSTOM_MED,FREEMAIL_FROM,NML_ADSP_CUSTOM_MED autolearn=no autolearn_force=no version=3.4.0 X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on tom.home X-BeenThere: freebsd-arch@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: Discussion related to FreeBSD architecture List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 20 Apr 2015 16:21:59 -0000 Below is the patch which unifies some code from sys/{amd64/amd64,i386/i386}/machdep.c into the new shared file sys/x86/x86/cpu_machdep.c. Most of the code is related to handling the idle CPU state, but there is some additional trivialities like cpu_boot() etc. The move is mostly a preparation for some other changes to the idle infrastructure. I did not wanted to make same changes twice. Make universe passed with the patch, I successfully booted debug amd64 kernel and UP i386. Comments ? diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 4c20e4f..3230937 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -578,375 +578,6 @@ freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) } #endif - -/* - * Machine dependent boot() routine - * - * I haven't seen anything to put here yet - * Possibly some stuff might be grafted back here from boot() - */ -void -cpu_boot(int howto) -{ -} - -/* - * Flush the D-cache for non-DMA I/O so that the I-cache can - * be made coherent later. - */ -void -cpu_flush_dcache(void *ptr, size_t len) -{ - /* Not applicable */ -} - -/* Get current clock frequency for the given cpu id. */ -int -cpu_est_clockrate(int cpu_id, uint64_t *rate) -{ - uint64_t tsc1, tsc2; - uint64_t acnt, mcnt, perf; - register_t reg; - - if (pcpu_find(cpu_id) == NULL || rate == NULL) - return (EINVAL); - - /* - * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, - * DELAY(9) based logic fails. - */ - if (tsc_is_invariant && !tsc_perf_stat) - return (EOPNOTSUPP); - -#ifdef SMP - if (smp_cpus > 1) { - /* Schedule ourselves on the indicated cpu. */ - thread_lock(curthread); - sched_bind(curthread, cpu_id); - thread_unlock(curthread); - } -#endif - - /* Calibrate by measuring a short delay. */ - reg = intr_disable(); - if (tsc_is_invariant) { - wrmsr(MSR_MPERF, 0); - wrmsr(MSR_APERF, 0); - tsc1 = rdtsc(); - DELAY(1000); - mcnt = rdmsr(MSR_MPERF); - acnt = rdmsr(MSR_APERF); - tsc2 = rdtsc(); - intr_restore(reg); - perf = 1000 * acnt / mcnt; - *rate = (tsc2 - tsc1) * perf; - } else { - tsc1 = rdtsc(); - DELAY(1000); - tsc2 = rdtsc(); - intr_restore(reg); - *rate = (tsc2 - tsc1) * 1000; - } - -#ifdef SMP - if (smp_cpus > 1) { - thread_lock(curthread); - sched_unbind(curthread); - thread_unlock(curthread); - } -#endif - - return (0); -} - -/* - * Shutdown the CPU as much as possible - */ -void -cpu_halt(void) -{ - for (;;) - halt(); -} - -void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ -static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ -static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ -SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait, - 0, "Use MONITOR/MWAIT for short idle"); - -#define STATE_RUNNING 0x0 -#define STATE_MWAIT 0x1 -#define STATE_SLEEPING 0x2 - -static void -cpu_idle_acpi(sbintime_t sbt) -{ - int *state; - - state = (int *)PCPU_PTR(monitorbuf); - *state = STATE_SLEEPING; - - /* See comments in cpu_idle_hlt(). */ - disable_intr(); - if (sched_runnable()) - enable_intr(); - else if (cpu_idle_hook) - cpu_idle_hook(sbt); - else - __asm __volatile("sti; hlt"); - *state = STATE_RUNNING; -} - -static void -cpu_idle_hlt(sbintime_t sbt) -{ - int *state; - - state = (int *)PCPU_PTR(monitorbuf); - *state = STATE_SLEEPING; - - /* - * Since we may be in a critical section from cpu_idle(), if - * an interrupt fires during that critical section we may have - * a pending preemption. If the CPU halts, then that thread - * may not execute until a later interrupt awakens the CPU. - * To handle this race, check for a runnable thread after - * disabling interrupts and immediately return if one is - * found. Also, we must absolutely guarentee that hlt is - * the next instruction after sti. This ensures that any - * interrupt that fires after the call to disable_intr() will - * immediately awaken the CPU from hlt. Finally, please note - * that on x86 this works fine because of interrupts enabled only - * after the instruction following sti takes place, while IF is set - * to 1 immediately, allowing hlt instruction to acknowledge the - * interrupt. - */ - disable_intr(); - if (sched_runnable()) - enable_intr(); - else - __asm __volatile("sti; hlt"); - *state = STATE_RUNNING; -} - -static void -cpu_idle_mwait(sbintime_t sbt) -{ - int *state; - - state = (int *)PCPU_PTR(monitorbuf); - *state = STATE_MWAIT; - - /* See comments in cpu_idle_hlt(). */ - disable_intr(); - if (sched_runnable()) { - enable_intr(); - *state = STATE_RUNNING; - return; - } - cpu_monitor(state, 0, 0); - if (*state == STATE_MWAIT) - __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); - else - enable_intr(); - *state = STATE_RUNNING; -} - -static void -cpu_idle_spin(sbintime_t sbt) -{ - int *state; - int i; - - state = (int *)PCPU_PTR(monitorbuf); - *state = STATE_RUNNING; - - /* - * The sched_runnable() call is racy but as long as there is - * a loop missing it one time will have just a little impact if any - * (and it is much better than missing the check at all). - */ - for (i = 0; i < 1000; i++) { - if (sched_runnable()) - return; - cpu_spinwait(); - } -} - -/* - * C1E renders the local APIC timer dead, so we disable it by - * reading the Interrupt Pending Message register and clearing - * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). - * - * Reference: - * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" - * #32559 revision 3.00+ - */ -#define MSR_AMDK8_IPM 0xc0010055 -#define AMDK8_SMIONCMPHALT (1ULL << 27) -#define AMDK8_C1EONCMPHALT (1ULL << 28) -#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) - -static void -cpu_probe_amdc1e(void) -{ - - /* - * Detect the presence of C1E capability mostly on latest - * dual-cores (or future) k8 family. - */ - if (cpu_vendor_id == CPU_VENDOR_AMD && - (cpu_id & 0x00000f00) == 0x00000f00 && - (cpu_id & 0x0fff0000) >= 0x00040000) { - cpu_ident_amdc1e = 1; - } -} - -void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; - -void -cpu_idle(int busy) -{ - uint64_t msr; - sbintime_t sbt = -1; - - CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", - busy, curcpu); -#ifdef MP_WATCHDOG - ap_watchdog(PCPU_GET(cpuid)); -#endif - /* If we are busy - try to use fast methods. */ - if (busy) { - if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { - cpu_idle_mwait(busy); - goto out; - } - } - - /* If we have time - switch timers into idle mode. */ - if (!busy) { - critical_enter(); - sbt = cpu_idleclock(); - } - - /* Apply AMD APIC timer C1E workaround. */ - if (cpu_ident_amdc1e && cpu_disable_c3_sleep) { - msr = rdmsr(MSR_AMDK8_IPM); - if (msr & AMDK8_CMPHALT) - wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); - } - - /* Call main idle method. */ - cpu_idle_fn(sbt); - - /* Switch timers back into active mode. */ - if (!busy) { - cpu_activeclock(); - critical_exit(); - } -out: - CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", - busy, curcpu); -} - -int -cpu_idle_wakeup(int cpu) -{ - struct pcpu *pcpu; - int *state; - - pcpu = pcpu_find(cpu); - state = (int *)pcpu->pc_monitorbuf; - /* - * This doesn't need to be atomic since missing the race will - * simply result in unnecessary IPIs. - */ - if (*state == STATE_SLEEPING) - return (0); - if (*state == STATE_MWAIT) - *state = STATE_RUNNING; - return (1); -} - -/* - * Ordered by speed/power consumption. - */ -struct { - void *id_fn; - char *id_name; -} idle_tbl[] = { - { cpu_idle_spin, "spin" }, - { cpu_idle_mwait, "mwait" }, - { cpu_idle_hlt, "hlt" }, - { cpu_idle_acpi, "acpi" }, - { NULL, NULL } -}; - -static int -idle_sysctl_available(SYSCTL_HANDLER_ARGS) -{ - char *avail, *p; - int error; - int i; - - avail = malloc(256, M_TEMP, M_WAITOK); - p = avail; - for (i = 0; idle_tbl[i].id_name != NULL; i++) { - if (strstr(idle_tbl[i].id_name, "mwait") && - (cpu_feature2 & CPUID2_MON) == 0) - continue; - if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && - cpu_idle_hook == NULL) - continue; - p += sprintf(p, "%s%s", p != avail ? ", " : "", - idle_tbl[i].id_name); - } - error = sysctl_handle_string(oidp, avail, 0, req); - free(avail, M_TEMP); - return (error); -} - -SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, - 0, 0, idle_sysctl_available, "A", "list of available idle functions"); - -static int -idle_sysctl(SYSCTL_HANDLER_ARGS) -{ - char buf[16]; - int error; - char *p; - int i; - - p = "unknown"; - for (i = 0; idle_tbl[i].id_name != NULL; i++) { - if (idle_tbl[i].id_fn == cpu_idle_fn) { - p = idle_tbl[i].id_name; - break; - } - } - strncpy(buf, p, sizeof(buf)); - error = sysctl_handle_string(oidp, buf, sizeof(buf), req); - if (error != 0 || req->newptr == NULL) - return (error); - for (i = 0; idle_tbl[i].id_name != NULL; i++) { - if (strstr(idle_tbl[i].id_name, "mwait") && - (cpu_feature2 & CPUID2_MON) == 0) - continue; - if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && - cpu_idle_hook == NULL) - continue; - if (strcmp(idle_tbl[i].id_name, buf)) - continue; - cpu_idle_fn = idle_tbl[i].id_fn; - return (0); - } - return (EINVAL); -} - -SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, - idle_sysctl, "A", "currently selected idle function"); - /* * Reset registers to default values on exec. */ diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index ccde0e3..9083421 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -91,6 +91,7 @@ struct dumperinfo; void *alloc_fpusave(int flags); void amd64_syscall(struct thread *td, int traced); void busdma_swi(void); +void cpu_probe_amdc1e(void); void cpu_setregs(void); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 4910903..ae71c39 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -558,6 +558,7 @@ x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard +x86/x86/cpu_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt x86/x86/identcpu.c standard diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 1873514..f072247 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -576,6 +576,7 @@ x86/pci/pci_bus.c optional pci x86/pci/qpi.c optional pci x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard +x86/x86/cpu_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/fdt_machdep.c optional fdt x86/x86/identcpu.c standard diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98 index be67ce4..f95d0bb 100644 --- a/sys/conf/files.pc98 +++ b/sys/conf/files.pc98 @@ -248,6 +248,7 @@ x86/isa/isa.c optional isa x86/pci/pci_bus.c optional pci x86/x86/busdma_bounce.c standard x86/x86/busdma_machdep.c standard +x86/x86/cpu_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/identcpu.c standard x86/x86/intr_machdep.c standard diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 123db4e..72f7685 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1176,427 +1176,6 @@ sys_sigreturn(td, uap) } /* - * Machine dependent boot() routine - * - * I haven't seen anything to put here yet - * Possibly some stuff might be grafted back here from boot() - */ -void -cpu_boot(int howto) -{ -} - -/* - * Flush the D-cache for non-DMA I/O so that the I-cache can - * be made coherent later. - */ -void -cpu_flush_dcache(void *ptr, size_t len) -{ - /* Not applicable */ -} - -/* Get current clock frequency for the given cpu id. */ -int -cpu_est_clockrate(int cpu_id, uint64_t *rate) -{ - uint64_t tsc1, tsc2; - uint64_t acnt, mcnt, perf; - register_t reg; - - if (pcpu_find(cpu_id) == NULL || rate == NULL) - return (EINVAL); - if ((cpu_feature & CPUID_TSC) == 0) - return (EOPNOTSUPP); - - /* - * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, - * DELAY(9) based logic fails. - */ - if (tsc_is_invariant && !tsc_perf_stat) - return (EOPNOTSUPP); - -#ifdef SMP - if (smp_cpus > 1) { - /* Schedule ourselves on the indicated cpu. */ - thread_lock(curthread); - sched_bind(curthread, cpu_id); - thread_unlock(curthread); - } -#endif - - /* Calibrate by measuring a short delay. */ - reg = intr_disable(); - if (tsc_is_invariant) { - wrmsr(MSR_MPERF, 0); - wrmsr(MSR_APERF, 0); - tsc1 = rdtsc(); - DELAY(1000); - mcnt = rdmsr(MSR_MPERF); - acnt = rdmsr(MSR_APERF); - tsc2 = rdtsc(); - intr_restore(reg); - perf = 1000 * acnt / mcnt; - *rate = (tsc2 - tsc1) * perf; - } else { - tsc1 = rdtsc(); - DELAY(1000); - tsc2 = rdtsc(); - intr_restore(reg); - *rate = (tsc2 - tsc1) * 1000; - } - -#ifdef SMP - if (smp_cpus > 1) { - thread_lock(curthread); - sched_unbind(curthread); - thread_unlock(curthread); - } -#endif - - return (0); -} - -#ifdef XEN - -static void -idle_block(void) -{ - - HYPERVISOR_sched_op(SCHEDOP_block, 0); -} - -void -cpu_halt(void) -{ - HYPERVISOR_shutdown(SHUTDOWN_poweroff); -} - -int scheduler_running; - -static void -cpu_idle_hlt(sbintime_t sbt) -{ - - scheduler_running = 1; - enable_intr(); - idle_block(); -} - -#else -/* - * Shutdown the CPU as much as possible - */ -void -cpu_halt(void) -{ - for (;;) - halt(); -} - -#endif - -void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ -static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ -static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ -SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait, - 0, "Use MONITOR/MWAIT for short idle"); - -#define STATE_RUNNING 0x0 -#define STATE_MWAIT 0x1 -#define STATE_SLEEPING 0x2 - -#ifndef PC98 -static void -cpu_idle_acpi(sbintime_t sbt) -{ - int *state; - - state = (int *)PCPU_PTR(monitorbuf); - *state = STATE_SLEEPING; - - /* See comments in cpu_idle_hlt(). */ - disable_intr(); - if (sched_runnable()) - enable_intr(); - else if (cpu_idle_hook) - cpu_idle_hook(sbt); - else - __asm __volatile("sti; hlt"); - *state = STATE_RUNNING; -} -#endif /* !PC98 */ - -#ifndef XEN -static void -cpu_idle_hlt(sbintime_t sbt) -{ - int *state; - - state = (int *)PCPU_PTR(monitorbuf); - *state = STATE_SLEEPING; - - /* - * Since we may be in a critical section from cpu_idle(), if - * an interrupt fires during that critical section we may have - * a pending preemption. If the CPU halts, then that thread - * may not execute until a later interrupt awakens the CPU. - * To handle this race, check for a runnable thread after - * disabling interrupts and immediately return if one is - * found. Also, we must absolutely guarentee that hlt is - * the next instruction after sti. This ensures that any - * interrupt that fires after the call to disable_intr() will - * immediately awaken the CPU from hlt. Finally, please note - * that on x86 this works fine because of interrupts enabled only - * after the instruction following sti takes place, while IF is set - * to 1 immediately, allowing hlt instruction to acknowledge the - * interrupt. - */ - disable_intr(); - if (sched_runnable()) - enable_intr(); - else - __asm __volatile("sti; hlt"); - *state = STATE_RUNNING; -} -#endif - -static void -cpu_idle_mwait(sbintime_t sbt) -{ - int *state; - - state = (int *)PCPU_PTR(monitorbuf); - *state = STATE_MWAIT; - - /* See comments in cpu_idle_hlt(). */ - disable_intr(); - if (sched_runnable()) { - enable_intr(); - *state = STATE_RUNNING; - return; - } - cpu_monitor(state, 0, 0); - if (*state == STATE_MWAIT) - __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); - else - enable_intr(); - *state = STATE_RUNNING; -} - -static void -cpu_idle_spin(sbintime_t sbt) -{ - int *state; - int i; - - state = (int *)PCPU_PTR(monitorbuf); - *state = STATE_RUNNING; - - /* - * The sched_runnable() call is racy but as long as there is - * a loop missing it one time will have just a little impact if any - * (and it is much better than missing the check at all). - */ - for (i = 0; i < 1000; i++) { - if (sched_runnable()) - return; - cpu_spinwait(); - } -} - -/* - * C1E renders the local APIC timer dead, so we disable it by - * reading the Interrupt Pending Message register and clearing - * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). - * - * Reference: - * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" - * #32559 revision 3.00+ - */ -#define MSR_AMDK8_IPM 0xc0010055 -#define AMDK8_SMIONCMPHALT (1ULL << 27) -#define AMDK8_C1EONCMPHALT (1ULL << 28) -#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) - -static void -cpu_probe_amdc1e(void) -{ - - /* - * Detect the presence of C1E capability mostly on latest - * dual-cores (or future) k8 family. - */ - if (cpu_vendor_id == CPU_VENDOR_AMD && - (cpu_id & 0x00000f00) == 0x00000f00 && - (cpu_id & 0x0fff0000) >= 0x00040000) { - cpu_ident_amdc1e = 1; - } -} - -#if defined(PC98) || defined(XEN) -void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt; -#else -void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; -#endif - -void -cpu_idle(int busy) -{ -#ifndef XEN - uint64_t msr; -#endif - sbintime_t sbt = -1; - - CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", - busy, curcpu); -#if defined(MP_WATCHDOG) && !defined(XEN) - ap_watchdog(PCPU_GET(cpuid)); -#endif -#ifndef XEN - /* If we are busy - try to use fast methods. */ - if (busy) { - if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { - cpu_idle_mwait(busy); - goto out; - } - } -#endif - - /* If we have time - switch timers into idle mode. */ - if (!busy) { - critical_enter(); - sbt = cpu_idleclock(); - } - -#ifndef XEN - /* Apply AMD APIC timer C1E workaround. */ - if (cpu_ident_amdc1e && cpu_disable_c3_sleep) { - msr = rdmsr(MSR_AMDK8_IPM); - if (msr & AMDK8_CMPHALT) - wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); - } -#endif - - /* Call main idle method. */ - cpu_idle_fn(sbt); - - /* Switch timers back into active mode. */ - if (!busy) { - cpu_activeclock(); - critical_exit(); - } -#ifndef XEN -out: -#endif - CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", - busy, curcpu); -} - -int -cpu_idle_wakeup(int cpu) -{ - struct pcpu *pcpu; - int *state; - - pcpu = pcpu_find(cpu); - state = (int *)pcpu->pc_monitorbuf; - /* - * This doesn't need to be atomic since missing the race will - * simply result in unnecessary IPIs. - */ - if (*state == STATE_SLEEPING) - return (0); - if (*state == STATE_MWAIT) - *state = STATE_RUNNING; - return (1); -} - -/* - * Ordered by speed/power consumption. - */ -struct { - void *id_fn; - char *id_name; -} idle_tbl[] = { - { cpu_idle_spin, "spin" }, - { cpu_idle_mwait, "mwait" }, - { cpu_idle_hlt, "hlt" }, -#ifndef PC98 - { cpu_idle_acpi, "acpi" }, -#endif - { NULL, NULL } -}; - -static int -idle_sysctl_available(SYSCTL_HANDLER_ARGS) -{ - char *avail, *p; - int error; - int i; - - avail = malloc(256, M_TEMP, M_WAITOK); - p = avail; - for (i = 0; idle_tbl[i].id_name != NULL; i++) { - if (strstr(idle_tbl[i].id_name, "mwait") && - (cpu_feature2 & CPUID2_MON) == 0) - continue; -#ifndef PC98 - if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && - cpu_idle_hook == NULL) - continue; -#endif - p += sprintf(p, "%s%s", p != avail ? ", " : "", - idle_tbl[i].id_name); - } - error = sysctl_handle_string(oidp, avail, 0, req); - free(avail, M_TEMP); - return (error); -} - -SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, - 0, 0, idle_sysctl_available, "A", "list of available idle functions"); - -static int -idle_sysctl(SYSCTL_HANDLER_ARGS) -{ - char buf[16]; - int error; - char *p; - int i; - - p = "unknown"; - for (i = 0; idle_tbl[i].id_name != NULL; i++) { - if (idle_tbl[i].id_fn == cpu_idle_fn) { - p = idle_tbl[i].id_name; - break; - } - } - strncpy(buf, p, sizeof(buf)); - error = sysctl_handle_string(oidp, buf, sizeof(buf), req); - if (error != 0 || req->newptr == NULL) - return (error); - for (i = 0; idle_tbl[i].id_name != NULL; i++) { - if (strstr(idle_tbl[i].id_name, "mwait") && - (cpu_feature2 & CPUID2_MON) == 0) - continue; -#ifndef PC98 - if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && - cpu_idle_hook == NULL) - continue; -#endif - if (strcmp(idle_tbl[i].id_name, buf)) - continue; - cpu_idle_fn = idle_tbl[i].id_fn; - return (0); - } - return (EINVAL); -} - -SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, - idle_sysctl, "A", "currently selected idle function"); - -/* * Reset registers to default values on exec. */ void diff --git a/sys/i386/include/md_var.h b/sys/i386/include/md_var.h index 339dff3..bffdd57 100644 --- a/sys/i386/include/md_var.h +++ b/sys/i386/include/md_var.h @@ -97,6 +97,7 @@ struct dumperinfo; void *alloc_fpusave(int flags); void bcopyb(const void *from, void *to, size_t len); void busdma_swi(void); +void cpu_probe_amdc1e(void); void cpu_setregs(void); void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs)); void doreti_iret(void) __asm(__STRING(doreti_iret)); diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c new file mode 100644 index 0000000..846a123 --- /dev/null +++ b/sys/x86/x86/cpu_machdep.c @@ -0,0 +1,533 @@ +/*- + * Copyright (c) 2003 Peter Wemm. + * Copyright (c) 1992 Terrence R. Lambert. + * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_atpic.h" +#include "opt_compat.h" +#include "opt_cpu.h" +#include "opt_ddb.h" +#include "opt_inet.h" +#include "opt_isa.h" +#include "opt_kstack_pages.h" +#include "opt_maxmem.h" +#include "opt_mp_watchdog.h" +#include "opt_perfmon.h" +#include "opt_platform.h" +#ifdef __i386__ +#include "opt_npx.h" +#include "opt_apic.h" +#include "opt_xbox.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef SMP +#include +#endif +#include + +#include +#include +#include +#include +#include +#include +#ifdef PERFMON +#include +#endif +#include +#ifdef SMP +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef XEN +/* XEN includes */ +#include +#include +#include +#include +#include +#endif + +/* + * Machine dependent boot() routine + * + * I haven't seen anything to put here yet + * Possibly some stuff might be grafted back here from boot() + */ +void +cpu_boot(int howto) +{ +} + +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* Not applicable */ +} + +/* Get current clock frequency for the given cpu id. */ +int +cpu_est_clockrate(int cpu_id, uint64_t *rate) +{ + uint64_t tsc1, tsc2; + uint64_t acnt, mcnt, perf; + register_t reg; + + if (pcpu_find(cpu_id) == NULL || rate == NULL) + return (EINVAL); +#ifdef __i386__ + if ((cpu_feature & CPUID_TSC) == 0) + return (EOPNOTSUPP); +#endif + + /* + * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, + * DELAY(9) based logic fails. + */ + if (tsc_is_invariant && !tsc_perf_stat) + return (EOPNOTSUPP); + +#ifdef SMP + if (smp_cpus > 1) { + /* Schedule ourselves on the indicated cpu. */ + thread_lock(curthread); + sched_bind(curthread, cpu_id); + thread_unlock(curthread); + } +#endif + + /* Calibrate by measuring a short delay. */ + reg = intr_disable(); + if (tsc_is_invariant) { + wrmsr(MSR_MPERF, 0); + wrmsr(MSR_APERF, 0); + tsc1 = rdtsc(); + DELAY(1000); + mcnt = rdmsr(MSR_MPERF); + acnt = rdmsr(MSR_APERF); + tsc2 = rdtsc(); + intr_restore(reg); + perf = 1000 * acnt / mcnt; + *rate = (tsc2 - tsc1) * perf; + } else { + tsc1 = rdtsc(); + DELAY(1000); + tsc2 = rdtsc(); + intr_restore(reg); + *rate = (tsc2 - tsc1) * 1000; + } + +#ifdef SMP + if (smp_cpus > 1) { + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + } +#endif + + return (0); +} + +#if defined(__i386__) && defined(XEN) + +static void +idle_block(void) +{ + + HYPERVISOR_sched_op(SCHEDOP_block, 0); +} + +void +cpu_halt(void) +{ + HYPERVISOR_shutdown(SHUTDOWN_poweroff); +} + +int scheduler_running; + +static void +cpu_idle_hlt(sbintime_t sbt) +{ + + scheduler_running = 1; + enable_intr(); + idle_block(); +} + +#else +/* + * Shutdown the CPU as much as possible + */ +void +cpu_halt(void) +{ + for (;;) + halt(); +} + +#endif + +void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ +static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ +static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ +SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait, + 0, "Use MONITOR/MWAIT for short idle"); + +#define STATE_RUNNING 0x0 +#define STATE_MWAIT 0x1 +#define STATE_SLEEPING 0x2 + +#ifndef PC98 +static void +cpu_idle_acpi(sbintime_t sbt) +{ + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_SLEEPING; + + /* See comments in cpu_idle_hlt(). */ + disable_intr(); + if (sched_runnable()) + enable_intr(); + else if (cpu_idle_hook) + cpu_idle_hook(sbt); + else + __asm __volatile("sti; hlt"); + *state = STATE_RUNNING; +} +#endif /* !PC98 */ + +#if !defined(__i386__) || !defined(XEN) +static void +cpu_idle_hlt(sbintime_t sbt) +{ + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_SLEEPING; + + /* + * Since we may be in a critical section from cpu_idle(), if + * an interrupt fires during that critical section we may have + * a pending preemption. If the CPU halts, then that thread + * may not execute until a later interrupt awakens the CPU. + * To handle this race, check for a runnable thread after + * disabling interrupts and immediately return if one is + * found. Also, we must absolutely guarentee that hlt is + * the next instruction after sti. This ensures that any + * interrupt that fires after the call to disable_intr() will + * immediately awaken the CPU from hlt. Finally, please note + * that on x86 this works fine because of interrupts enabled only + * after the instruction following sti takes place, while IF is set + * to 1 immediately, allowing hlt instruction to acknowledge the + * interrupt. + */ + disable_intr(); + if (sched_runnable()) + enable_intr(); + else + __asm __volatile("sti; hlt"); + *state = STATE_RUNNING; +} +#endif + +static void +cpu_idle_mwait(sbintime_t sbt) +{ + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_MWAIT; + + /* See comments in cpu_idle_hlt(). */ + disable_intr(); + if (sched_runnable()) { + enable_intr(); + *state = STATE_RUNNING; + return; + } + cpu_monitor(state, 0, 0); + if (*state == STATE_MWAIT) + __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); + else + enable_intr(); + *state = STATE_RUNNING; +} + +static void +cpu_idle_spin(sbintime_t sbt) +{ + int *state; + int i; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_RUNNING; + + /* + * The sched_runnable() call is racy but as long as there is + * a loop missing it one time will have just a little impact if any + * (and it is much better than missing the check at all). + */ + for (i = 0; i < 1000; i++) { + if (sched_runnable()) + return; + cpu_spinwait(); + } +} + +/* + * C1E renders the local APIC timer dead, so we disable it by + * reading the Interrupt Pending Message register and clearing + * both C1eOnCmpHalt (bit 28) and SmiOnCmpHalt (bit 27). + * + * Reference: + * "BIOS and Kernel Developer's Guide for AMD NPT Family 0Fh Processors" + * #32559 revision 3.00+ + */ +#define MSR_AMDK8_IPM 0xc0010055 +#define AMDK8_SMIONCMPHALT (1ULL << 27) +#define AMDK8_C1EONCMPHALT (1ULL << 28) +#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) + +void +cpu_probe_amdc1e(void) +{ + + /* + * Detect the presence of C1E capability mostly on latest + * dual-cores (or future) k8 family. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + (cpu_id & 0x00000f00) == 0x00000f00 && + (cpu_id & 0x0fff0000) >= 0x00040000) { + cpu_ident_amdc1e = 1; + } +} + +#if defined(__i386__) && (defined(PC98) || defined(XEN)) +void (*cpu_idle_fn)(sbintime_t) = cpu_idle_hlt; +#else +void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; +#endif + +void +cpu_idle(int busy) +{ +#if !defined(__i386__) || !defined(XEN) + uint64_t msr; +#endif + sbintime_t sbt = -1; + + CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", + busy, curcpu); +#if defined(MP_WATCHDOG) && (!defined(__i386__) || !defined(XEN)) + ap_watchdog(PCPU_GET(cpuid)); +#endif +#if !defined(__i386__) || !defined(XEN) + /* If we are busy - try to use fast methods. */ + if (busy) { + if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { + cpu_idle_mwait(busy); + goto out; + } + } +#endif + + /* If we have time - switch timers into idle mode. */ + if (!busy) { + critical_enter(); + sbt = cpu_idleclock(); + } + +#if !defined(__i386__) || !defined(XEN) + /* Apply AMD APIC timer C1E workaround. */ + if (cpu_ident_amdc1e && cpu_disable_c3_sleep) { + msr = rdmsr(MSR_AMDK8_IPM); + if (msr & AMDK8_CMPHALT) + wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); + } +#endif + + /* Call main idle method. */ + cpu_idle_fn(sbt); + + /* Switch timers back into active mode. */ + if (!busy) { + cpu_activeclock(); + critical_exit(); + } +#if !defined(__i386__) || !defined(XEN) +out: +#endif + CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", + busy, curcpu); +} + +int +cpu_idle_wakeup(int cpu) +{ + struct pcpu *pcpu; + int *state; + + pcpu = pcpu_find(cpu); + state = (int *)pcpu->pc_monitorbuf; + /* + * This doesn't need to be atomic since missing the race will + * simply result in unnecessary IPIs. + */ + if (*state == STATE_SLEEPING) + return (0); + if (*state == STATE_MWAIT) + *state = STATE_RUNNING; + return (1); +} + +/* + * Ordered by speed/power consumption. + */ +struct { + void *id_fn; + char *id_name; +} idle_tbl[] = { + { cpu_idle_spin, "spin" }, + { cpu_idle_mwait, "mwait" }, + { cpu_idle_hlt, "hlt" }, +#if !defined(__i386__) || !defined(PC98) + { cpu_idle_acpi, "acpi" }, +#endif + { NULL, NULL } +}; + +static int +idle_sysctl_available(SYSCTL_HANDLER_ARGS) +{ + char *avail, *p; + int error; + int i; + + avail = malloc(256, M_TEMP, M_WAITOK); + p = avail; + for (i = 0; idle_tbl[i].id_name != NULL; i++) { + if (strstr(idle_tbl[i].id_name, "mwait") && + (cpu_feature2 & CPUID2_MON) == 0) + continue; +#if !defined(__i386__) || !defined(PC98) + if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && + cpu_idle_hook == NULL) + continue; +#endif + p += sprintf(p, "%s%s", p != avail ? ", " : "", + idle_tbl[i].id_name); + } + error = sysctl_handle_string(oidp, avail, 0, req); + free(avail, M_TEMP); + return (error); +} + +SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, + 0, 0, idle_sysctl_available, "A", "list of available idle functions"); + +static int +idle_sysctl(SYSCTL_HANDLER_ARGS) +{ + char buf[16]; + int error; + char *p; + int i; + + p = "unknown"; + for (i = 0; idle_tbl[i].id_name != NULL; i++) { + if (idle_tbl[i].id_fn == cpu_idle_fn) { + p = idle_tbl[i].id_name; + break; + } + } + strncpy(buf, p, sizeof(buf)); + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return (error); + for (i = 0; idle_tbl[i].id_name != NULL; i++) { + if (strstr(idle_tbl[i].id_name, "mwait") && + (cpu_feature2 & CPUID2_MON) == 0) + continue; +#if !defined(__i386__) || !defined(PC98) + if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && + cpu_idle_hook == NULL) + continue; +#endif + if (strcmp(idle_tbl[i].id_name, buf)) + continue; + cpu_idle_fn = idle_tbl[i].id_fn; + return (0); + } + return (EINVAL); +} + +SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, + idle_sysctl, "A", "currently selected idle function");