From owner-svn-src-all@FreeBSD.ORG Fri Jul 20 19:35:20 2012 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id BA8B21065670; Fri, 20 Jul 2012 19:35:20 +0000 (UTC) (envelope-from mav@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id A09538FC1A; Fri, 20 Jul 2012 19:35:20 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q6KJZKF8029665; Fri, 20 Jul 2012 19:35:20 GMT (envelope-from mav@svn.freebsd.org) Received: (from mav@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q6KJZK16029661; Fri, 20 Jul 2012 19:35:20 GMT (envelope-from mav@svn.freebsd.org) Message-Id: <201207201935.q6KJZK16029661@svn.freebsd.org> From: Alexander Motin Date: Fri, 20 Jul 2012 19:35:20 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r238658 - in stable/8/sys: amd64/amd64 i386/i386 pc98/pc98 X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 20 Jul 2012 19:35:20 -0000 Author: mav Date: Fri Jul 20 19:35:20 2012 New Revision: 238658 URL: http://svn.freebsd.org/changeset/base/238658 Log: Partially MFC r212541: Refactor cpu_idle() on x86. Use MONITOR/MWAIT instrunctions (if supported) under high sleep/wakeup rate, as fast alternative to other methods. It allows SMP scheduler to wake up sleeping CPUs much faster without using IPI, significantly increasing performance on some highly task-switching loads. Also on such loads it hides two ACPI timer reads, otherwise done by acpi_cpu_idle(), that are reported to be slow on some systems. MFC r225936 (by attilio): Add some improvements in the idle table callbacks: - Replace instances of manual assembly instruction "hlt" call with halt() function calling. - In cpu_idle_mwait() avoid races in check to sched_runnable() using the same pattern used in cpu_idle_hlt() with the 'hlt' instruction. - Add comments explaining the logic behind the pattern used in cpu_idle_hlt() and other idle callbacks. PR: kern/170021 Modified: stable/8/sys/amd64/amd64/machdep.c stable/8/sys/i386/i386/machdep.c stable/8/sys/pc98/pc98/machdep.c Directory Properties: stable/8/sys/ (props changed) Modified: stable/8/sys/amd64/amd64/machdep.c ============================================================================== --- stable/8/sys/amd64/amd64/machdep.c Fri Jul 20 17:51:20 2012 (r238657) +++ stable/8/sys/amd64/amd64/machdep.c Fri Jul 20 19:35:20 2012 (r238658) @@ -629,63 +629,122 @@ void cpu_halt(void) { for (;;) - __asm__ ("hlt"); + halt(); } void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ +static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ +static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ +TUNABLE_INT("machdep.idle_mwait", &idle_mwait); +SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait, + 0, "Use MONITOR/MWAIT for short idle"); + +#define STATE_RUNNING 0x0 +#define STATE_MWAIT 0x1 +#define STATE_SLEEPING 0x2 + +static void +cpu_idle_acpi(int busy) +{ + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_SLEEPING; + + /* See comments in cpu_idle_hlt(). */ + disable_intr(); + if (sched_runnable()) + enable_intr(); + else if (cpu_idle_hook) + cpu_idle_hook(); + else + __asm __volatile("sti; hlt"); + *state = STATE_RUNNING; +} static void cpu_idle_hlt(int busy) { + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_SLEEPING; + /* - * we must absolutely guarentee that hlt is the next instruction - * after sti or we introduce a timing window. + * Since we may be in a critical section from cpu_idle(), if + * an interrupt fires during that critical section we may have + * a pending preemption. If the CPU halts, then that thread + * may not execute until a later interrupt awakens the CPU. + * To handle this race, check for a runnable thread after + * disabling interrupts and immediately return if one is + * found. Also, we must absolutely guarentee that hlt is + * the next instruction after sti. This ensures that any + * interrupt that fires after the call to disable_intr() will + * immediately awaken the CPU from hlt. Finally, please note + * that on x86 this works fine because of interrupts enabled only + * after the instruction following sti takes place, while IF is set + * to 1 immediately, allowing hlt instruction to acknowledge the + * interrupt. */ disable_intr(); - if (sched_runnable()) + if (sched_runnable()) enable_intr(); else __asm __volatile("sti; hlt"); + *state = STATE_RUNNING; } +/* + * MWAIT cpu power states. Lower 4 bits are sub-states. + */ +#define MWAIT_C0 0xf0 +#define MWAIT_C1 0x00 +#define MWAIT_C2 0x10 +#define MWAIT_C3 0x20 +#define MWAIT_C4 0x30 + static void -cpu_idle_acpi(int busy) +cpu_idle_mwait(int busy) { + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_MWAIT; + + /* See comments in cpu_idle_hlt(). */ disable_intr(); - if (sched_runnable()) + if (sched_runnable()) { enable_intr(); - else if (cpu_idle_hook) - cpu_idle_hook(); + *state = STATE_RUNNING; + return; + } + cpu_monitor(state, 0, 0); + if (*state == STATE_MWAIT) + __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); else - __asm __volatile("sti; hlt"); + enable_intr(); + *state = STATE_RUNNING; } -static int cpu_ident_amdc1e = 0; - -static int -cpu_probe_amdc1e(void) +static void +cpu_idle_spin(int busy) { + int *state; int i; - /* - * Forget it, if we're not using local APIC timer. - */ - if (resource_disabled("apic", 0) || - (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0)) - return (0); + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_RUNNING; /* - * Detect the presence of C1E capability mostly on latest - * dual-cores (or future) k8 family. - */ - if (cpu_vendor_id == CPU_VENDOR_AMD && - (cpu_id & 0x00000f00) == 0x00000f00 && - (cpu_id & 0x0fff0000) >= 0x00040000) { - cpu_ident_amdc1e = 1; - return (1); + * The sched_runnable() call is racy but as long as there is + * a loop missing it one time will have just a little impact if any + * (and it is much better than missing the check at all). + */ + for (i = 0; i < 1000; i++) { + if (sched_runnable()) + return; + cpu_spinwait(); } - - return (0); } /* @@ -703,110 +762,66 @@ cpu_probe_amdc1e(void) #define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) static void -cpu_idle_amdc1e(int busy) +cpu_probe_amdc1e(void) { - disable_intr(); - if (sched_runnable()) - enable_intr(); - else { - uint64_t msr; - - msr = rdmsr(MSR_AMDK8_IPM); - if (msr & AMDK8_CMPHALT) - wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); - - if (cpu_idle_hook) - cpu_idle_hook(); - else - __asm __volatile("sti; hlt"); + /* + * Detect the presence of C1E capability mostly on latest + * dual-cores (or future) k8 family. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + (cpu_id & 0x00000f00) == 0x00000f00 && + (cpu_id & 0x0fff0000) >= 0x00040000) { + cpu_ident_amdc1e = 1; } } -static void -cpu_idle_spin(int busy) -{ - return; -} - void (*cpu_idle_fn)(int) = cpu_idle_acpi; void cpu_idle(int busy) { + uint64_t msr; + #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif - cpu_idle_fn(busy); -} - -/* - * mwait cpu power states. Lower 4 bits are sub-states. - */ -#define MWAIT_C0 0xf0 -#define MWAIT_C1 0x00 -#define MWAIT_C2 0x10 -#define MWAIT_C3 0x20 -#define MWAIT_C4 0x30 - -#define MWAIT_DISABLED 0x0 -#define MWAIT_WOKEN 0x1 -#define MWAIT_WAITING 0x2 - -static void -cpu_idle_mwait(int busy) -{ - int *mwait; - - mwait = (int *)PCPU_PTR(monitorbuf); - *mwait = MWAIT_WAITING; - if (sched_runnable()) - return; - cpu_monitor(mwait, 0, 0); - if (*mwait == MWAIT_WAITING) - cpu_mwait(0, MWAIT_C1); -} - -static void -cpu_idle_mwait_hlt(int busy) -{ - int *mwait; + /* If we are busy - try to use fast methods. */ + if (busy) { + if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { + cpu_idle_mwait(busy); + return; + } + } - mwait = (int *)PCPU_PTR(monitorbuf); - if (busy == 0) { - *mwait = MWAIT_DISABLED; - cpu_idle_hlt(busy); - return; + /* Apply AMD APIC timer C1E workaround. */ + if (cpu_ident_amdc1e) { + msr = rdmsr(MSR_AMDK8_IPM); + if (msr & AMDK8_CMPHALT) + wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); } - *mwait = MWAIT_WAITING; - if (sched_runnable()) - return; - cpu_monitor(mwait, 0, 0); - if (*mwait == MWAIT_WAITING) - cpu_mwait(0, MWAIT_C1); + + /* Call main idle method. */ + cpu_idle_fn(busy); } int cpu_idle_wakeup(int cpu) { struct pcpu *pcpu; - int *mwait; + int *state; - if (cpu_idle_fn == cpu_idle_spin) - return (1); - if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt) - return (0); pcpu = pcpu_find(cpu); - mwait = (int *)pcpu->pc_monitorbuf; + state = (int *)pcpu->pc_monitorbuf; /* * This doesn't need to be atomic since missing the race will * simply result in unnecessary IPIs. */ - if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED) + if (*state == STATE_SLEEPING) return (0); - *mwait = MWAIT_WOKEN; - + if (*state == STATE_MWAIT) + *state = STATE_RUNNING; return (1); } @@ -819,8 +834,6 @@ struct { } idle_tbl[] = { { cpu_idle_spin, "spin" }, { cpu_idle_mwait, "mwait" }, - { cpu_idle_mwait_hlt, "mwait_hlt" }, - { cpu_idle_amdc1e, "amdc1e" }, { cpu_idle_hlt, "hlt" }, { cpu_idle_acpi, "acpi" }, { NULL, NULL } @@ -839,8 +852,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; - if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 && - cpu_ident_amdc1e == 0) + if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && + cpu_idle_hook == NULL) continue; p += sprintf(p, "%s, ", idle_tbl[i].id_name); } @@ -849,6 +862,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG return (error); } +SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, + 0, 0, idle_sysctl_available, "A", "list of available idle functions"); + static int idle_sysctl(SYSCTL_HANDLER_ARGS) { @@ -872,8 +888,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS) if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; - if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 && - cpu_ident_amdc1e == 0) + if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && + cpu_idle_hook == NULL) continue; if (strcmp(idle_tbl[i].id_name, buf)) continue; @@ -883,9 +899,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS) return (EINVAL); } -SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, - 0, 0, idle_sysctl_available, "A", "list of available idle functions"); - SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, idle_sysctl, "A", "currently selected idle function"); @@ -1819,8 +1832,7 @@ hammer_time(u_int64_t modulep, u_int64_t } #endif - if (cpu_probe_amdc1e()) - cpu_idle_fn = cpu_idle_amdc1e; + cpu_probe_amdc1e(); /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); Modified: stable/8/sys/i386/i386/machdep.c ============================================================================== --- stable/8/sys/i386/i386/machdep.c Fri Jul 20 17:51:20 2012 (r238657) +++ stable/8/sys/i386/i386/machdep.c Fri Jul 20 19:35:20 2012 (r238658) @@ -1177,9 +1177,6 @@ cpu_est_clockrate(int cpu_id, uint64_t * return (0); } - -void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ - #ifdef XEN void @@ -1207,66 +1204,127 @@ void cpu_halt(void) { for (;;) - __asm__ ("hlt"); + halt(); } +#endif + +void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ +static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */ +static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ +TUNABLE_INT("machdep.idle_mwait", &idle_mwait); +SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait, + 0, "Use MONITOR/MWAIT for short idle"); + +#define STATE_RUNNING 0x0 +#define STATE_MWAIT 0x1 +#define STATE_SLEEPING 0x2 + +static void +cpu_idle_acpi(int busy) +{ + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_SLEEPING; + + /* See comments in cpu_idle_hlt(). */ + disable_intr(); + if (sched_runnable()) + enable_intr(); + else if (cpu_idle_hook) + cpu_idle_hook(); + else + __asm __volatile("sti; hlt"); + *state = STATE_RUNNING; +} + +#ifndef XEN static void cpu_idle_hlt(int busy) { + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_SLEEPING; + /* - * we must absolutely guarentee that hlt is the next instruction - * after sti or we introduce a timing window. + * Since we may be in a critical section from cpu_idle(), if + * an interrupt fires during that critical section we may have + * a pending preemption. If the CPU halts, then that thread + * may not execute until a later interrupt awakens the CPU. + * To handle this race, check for a runnable thread after + * disabling interrupts and immediately return if one is + * found. Also, we must absolutely guarentee that hlt is + * the next instruction after sti. This ensures that any + * interrupt that fires after the call to disable_intr() will + * immediately awaken the CPU from hlt. Finally, please note + * that on x86 this works fine because of interrupts enabled only + * after the instruction following sti takes place, while IF is set + * to 1 immediately, allowing hlt instruction to acknowledge the + * interrupt. */ disable_intr(); - if (sched_runnable()) + if (sched_runnable()) enable_intr(); else __asm __volatile("sti; hlt"); + *state = STATE_RUNNING; } #endif +/* + * MWAIT cpu power states. Lower 4 bits are sub-states. + */ +#define MWAIT_C0 0xf0 +#define MWAIT_C1 0x00 +#define MWAIT_C2 0x10 +#define MWAIT_C3 0x20 +#define MWAIT_C4 0x30 + static void -cpu_idle_acpi(int busy) +cpu_idle_mwait(int busy) { + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_MWAIT; + + /* See comments in cpu_idle_hlt(). */ disable_intr(); - if (sched_runnable()) + if (sched_runnable()) { enable_intr(); - else if (cpu_idle_hook) - cpu_idle_hook(); + *state = STATE_RUNNING; + return; + } + cpu_monitor(state, 0, 0); + if (*state == STATE_MWAIT) + __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); else - __asm __volatile("sti; hlt"); + enable_intr(); + *state = STATE_RUNNING; } -static int cpu_ident_amdc1e = 0; - -#if !defined(XEN) || defined(XEN_PRIVILEGED) -static int -cpu_probe_amdc1e(void) -{ -#ifdef DEV_APIC +static void +cpu_idle_spin(int busy) +{ + int *state; int i; - /* - * Forget it, if we're not using local APIC timer. - */ - if (resource_disabled("apic", 0) || - (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0)) - return (0); + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_RUNNING; /* - * Detect the presence of C1E capability mostly on latest - * dual-cores (or future) k8 family. - */ - if (cpu_vendor_id == CPU_VENDOR_AMD && - (cpu_id & 0x00000f00) == 0x00000f00 && - (cpu_id & 0x0fff0000) >= 0x00040000) { - cpu_ident_amdc1e = 1; - return (1); + * The sched_runnable() call is racy but as long as there is + * a loop missing it one time will have just a little impact if any + * (and it is much better than missing the check at all). + */ + for (i = 0; i < 1000; i++) { + if (sched_runnable()) + return; + cpu_spinwait(); } -#endif - return (0); } -#endif /* * C1E renders the local APIC timer dead, so we disable it by @@ -1283,32 +1341,20 @@ cpu_probe_amdc1e(void) #define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT) static void -cpu_idle_amdc1e(int busy) +cpu_probe_amdc1e(void) { - disable_intr(); - if (sched_runnable()) - enable_intr(); - else { - uint64_t msr; - - msr = rdmsr(MSR_AMDK8_IPM); - if (msr & AMDK8_CMPHALT) - wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); - - if (cpu_idle_hook) - cpu_idle_hook(); - else - __asm __volatile("sti; hlt"); + /* + * Detect the presence of C1E capability mostly on latest + * dual-cores (or future) k8 family. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + (cpu_id & 0x00000f00) == 0x00000f00 && + (cpu_id & 0x0fff0000) >= 0x00040000) { + cpu_ident_amdc1e = 1; } } -static void -cpu_idle_spin(int busy) -{ - return; -} - #ifdef XEN void (*cpu_idle_fn)(int) = cpu_idle_hlt; #else @@ -1318,79 +1364,51 @@ void (*cpu_idle_fn)(int) = cpu_idle_acpi void cpu_idle(int busy) { +#ifndef XEN + uint64_t msr; +#endif + #if defined(SMP) && !defined(XEN) if (mp_grab_cpu_hlt()) return; #endif - cpu_idle_fn(busy); -} - -/* - * mwait cpu power states. Lower 4 bits are sub-states. - */ -#define MWAIT_C0 0xf0 -#define MWAIT_C1 0x00 -#define MWAIT_C2 0x10 -#define MWAIT_C3 0x20 -#define MWAIT_C4 0x30 - -#define MWAIT_DISABLED 0x0 -#define MWAIT_WOKEN 0x1 -#define MWAIT_WAITING 0x2 - -static void -cpu_idle_mwait(int busy) -{ - int *mwait; - - mwait = (int *)PCPU_PTR(monitorbuf); - *mwait = MWAIT_WAITING; - if (sched_runnable()) - return; - cpu_monitor(mwait, 0, 0); - if (*mwait == MWAIT_WAITING) - cpu_mwait(0, MWAIT_C1); -} - -static void -cpu_idle_mwait_hlt(int busy) -{ - int *mwait; +#ifndef XEN + /* If we are busy - try to use fast methods. */ + if (busy) { + if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { + cpu_idle_mwait(busy); + return; + } + } - mwait = (int *)PCPU_PTR(monitorbuf); - if (busy == 0) { - *mwait = MWAIT_DISABLED; - cpu_idle_hlt(busy); - return; + /* Apply AMD APIC timer C1E workaround. */ + if (cpu_ident_amdc1e) { + msr = rdmsr(MSR_AMDK8_IPM); + if (msr & AMDK8_CMPHALT) + wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); } - *mwait = MWAIT_WAITING; - if (sched_runnable()) - return; - cpu_monitor(mwait, 0, 0); - if (*mwait == MWAIT_WAITING) - cpu_mwait(0, MWAIT_C1); +#endif + + /* Call main idle method. */ + cpu_idle_fn(busy); } int cpu_idle_wakeup(int cpu) { struct pcpu *pcpu; - int *mwait; + int *state; - if (cpu_idle_fn == cpu_idle_spin) - return (1); - if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt) - return (0); pcpu = pcpu_find(cpu); - mwait = (int *)pcpu->pc_monitorbuf; + state = (int *)pcpu->pc_monitorbuf; /* * This doesn't need to be atomic since missing the race will * simply result in unnecessary IPIs. */ - if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED) + if (*state == STATE_SLEEPING) return (0); - *mwait = MWAIT_WOKEN; - + if (*state == STATE_MWAIT) + *state = STATE_RUNNING; return (1); } @@ -1403,8 +1421,6 @@ struct { } idle_tbl[] = { { cpu_idle_spin, "spin" }, { cpu_idle_mwait, "mwait" }, - { cpu_idle_mwait_hlt, "mwait_hlt" }, - { cpu_idle_amdc1e, "amdc1e" }, { cpu_idle_hlt, "hlt" }, { cpu_idle_acpi, "acpi" }, { NULL, NULL } @@ -1423,8 +1439,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; - if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 && - cpu_ident_amdc1e == 0) + if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && + cpu_idle_hook == NULL) continue; p += sprintf(p, "%s, ", idle_tbl[i].id_name); } @@ -1433,6 +1449,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG return (error); } +SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, + 0, 0, idle_sysctl_available, "A", "list of available idle functions"); + static int idle_sysctl(SYSCTL_HANDLER_ARGS) { @@ -1456,8 +1475,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS) if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; - if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 && - cpu_ident_amdc1e == 0) + if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && + cpu_idle_hook == NULL) continue; if (strcmp(idle_tbl[i].id_name, buf)) continue; @@ -1467,9 +1486,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS) return (EINVAL); } -SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, - 0, 0, idle_sysctl_available, "A", "list of available idle functions"); - SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, idle_sysctl, "A", "currently selected idle function"); @@ -2723,8 +2739,7 @@ init386(first) thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1]; #if defined(XEN_PRIVILEGED) - if (cpu_probe_amdc1e()) - cpu_idle_fn = cpu_idle_amdc1e; + cpu_probe_amdc1e(); #endif } @@ -3001,8 +3016,7 @@ init386(first) thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; - if (cpu_probe_amdc1e()) - cpu_idle_fn = cpu_idle_amdc1e; + cpu_probe_amdc1e(); } #endif Modified: stable/8/sys/pc98/pc98/machdep.c ============================================================================== --- stable/8/sys/pc98/pc98/machdep.c Fri Jul 20 17:51:20 2012 (r238657) +++ stable/8/sys/pc98/pc98/machdep.c Fri Jul 20 19:35:20 2012 (r238658) @@ -1122,40 +1122,36 @@ cpu_halt(void) __asm__ ("hlt"); } +static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ +TUNABLE_INT("machdep.idle_mwait", &idle_mwait); +SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait, + 0, "Use MONITOR/MWAIT for short idle"); + +#define STATE_RUNNING 0x0 +#define STATE_MWAIT 0x1 +#define STATE_SLEEPING 0x2 + static void cpu_idle_hlt(int busy) { + int *state; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_SLEEPING; /* - * we must absolutely guarentee that hlt is the next instruction + * We must absolutely guarentee that hlt is the next instruction * after sti or we introduce a timing window. */ disable_intr(); - if (sched_runnable()) + if (sched_runnable()) enable_intr(); else __asm __volatile("sti; hlt"); -} - -static void -cpu_idle_spin(int busy) -{ - return; -} - -void (*cpu_idle_fn)(int) = cpu_idle_hlt; - -void -cpu_idle(int busy) -{ -#if defined(SMP) - if (mp_grab_cpu_hlt()) - return; -#endif - cpu_idle_fn(busy); + *state = STATE_RUNNING; } /* - * mwait cpu power states. Lower 4 bits are sub-states. + * MWAIT cpu power states. Lower 4 bits are sub-states. */ #define MWAIT_C0 0xf0 #define MWAIT_C1 0x00 @@ -1163,63 +1159,74 @@ cpu_idle(int busy) #define MWAIT_C3 0x20 #define MWAIT_C4 0x30 -#define MWAIT_DISABLED 0x0 -#define MWAIT_WOKEN 0x1 -#define MWAIT_WAITING 0x2 - static void cpu_idle_mwait(int busy) { - int *mwait; + int *state; - mwait = (int *)PCPU_PTR(monitorbuf); - *mwait = MWAIT_WAITING; - if (sched_runnable()) - return; - cpu_monitor(mwait, 0, 0); - if (*mwait == MWAIT_WAITING) - cpu_mwait(0, MWAIT_C1); + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_MWAIT; + if (!sched_runnable()) { + cpu_monitor(state, 0, 0); + if (*state == STATE_MWAIT) + cpu_mwait(0, MWAIT_C1); + } + *state = STATE_RUNNING; } static void -cpu_idle_mwait_hlt(int busy) +cpu_idle_spin(int busy) +{ + int *state; + int i; + + state = (int *)PCPU_PTR(monitorbuf); + *state = STATE_RUNNING; + for (i = 0; i < 1000; i++) { + if (sched_runnable()) + return; + cpu_spinwait(); + } +} + +void (*cpu_idle_fn)(int) = cpu_idle_hlt; + +void +cpu_idle(int busy) { - int *mwait; - mwait = (int *)PCPU_PTR(monitorbuf); - if (busy == 0) { - *mwait = MWAIT_DISABLED; - cpu_idle_hlt(busy); +#ifdef SMP + if (mp_grab_cpu_hlt()) return; +#endif + /* If we are busy - try to use fast methods. */ + if (busy) { + if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { + cpu_idle_mwait(busy); + return; + } } - *mwait = MWAIT_WAITING; - if (sched_runnable()) - return; - cpu_monitor(mwait, 0, 0); - if (*mwait == MWAIT_WAITING) - cpu_mwait(0, MWAIT_C1); + + /* Call main idle method. */ + cpu_idle_fn(busy); } int cpu_idle_wakeup(int cpu) { struct pcpu *pcpu; - int *mwait; + int *state; - if (cpu_idle_fn == cpu_idle_spin) - return (1); - if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt) - return (0); pcpu = pcpu_find(cpu); - mwait = (int *)pcpu->pc_monitorbuf; + state = (int *)pcpu->pc_monitorbuf; /* * This doesn't need to be atomic since missing the race will * simply result in unnecessary IPIs. */ - if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED) + if (*state == STATE_SLEEPING) return (0); - *mwait = MWAIT_WOKEN; - + if (*state == STATE_MWAIT) + *state = STATE_RUNNING; return (1); } @@ -1232,7 +1239,6 @@ struct { } idle_tbl[] = { { cpu_idle_spin, "spin" }, { cpu_idle_mwait, "mwait" }, - { cpu_idle_mwait_hlt, "mwait_hlt" }, { cpu_idle_hlt, "hlt" }, { NULL, NULL } }; @@ -1257,6 +1263,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARG return (error); } +SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, + 0, 0, idle_sysctl_available, "A", "list of available idle functions"); + static int idle_sysctl(SYSCTL_HANDLER_ARGS) { @@ -1288,9 +1297,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS) return (EINVAL); } -SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, - 0, 0, idle_sysctl_available, "A", "list of available idle functions"); - SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, idle_sysctl, "A", "currently selected idle function");