Date: Fri, 13 May 2011 12:39:38 +0000 (UTC) From: Alexander Motin <mav@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r221835 - in head/sys: i386/i386 i386/xen kern Message-ID: <201105131239.p4DCdccA071147@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mav Date: Fri May 13 12:39:37 2011 New Revision: 221835 URL: http://svn.freebsd.org/changeset/base/221835 Log: Refactor Xen PV code to use new event timers subsystem. That uses one-shot Xen timer and time counter to provide one-shot and periodic time events. On my tests this reduces idle interruts rate down to about 30Hz, and accor- ding to Xen VM Manager reduces host CPU load by three times comparing to the previous periodic 100Hz clock. Also now, when needed, it is possible to increase HZ rate without useless CPU burning during idle periods. Now only ia64 and some ARMs left not migrated to the new event timers. Modified: head/sys/i386/i386/machdep.c head/sys/i386/xen/clock.c head/sys/i386/xen/mp_machdep.c head/sys/kern/kern_clocksource.c Modified: head/sys/i386/i386/machdep.c ============================================================================== --- head/sys/i386/i386/machdep.c Fri May 13 10:36:38 2011 (r221834) +++ head/sys/i386/i386/machdep.c Fri May 13 12:39:37 2011 (r221835) @@ -1351,7 +1351,9 @@ void (*cpu_idle_fn)(int) = cpu_idle_acpi void cpu_idle(int busy) { +#ifndef XEN uint64_t msr; +#endif CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); @@ -1367,34 +1369,32 @@ cpu_idle(int busy) goto out; } } +#endif /* If we have time - switch timers into idle mode. */ if (!busy) { critical_enter(); cpu_idleclock(); } -#endif - /* Apply AMD APIC timer C1E workaround. */ - if (cpu_ident_amdc1e #ifndef XEN - && cpu_disable_deep_sleep -#endif - ) { + /* Apply AMD APIC timer C1E workaround. */ + if (cpu_ident_amdc1e && cpu_disable_deep_sleep) { msr = rdmsr(MSR_AMDK8_IPM); if (msr & AMDK8_CMPHALT) wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT); } +#endif /* Call main idle method. */ cpu_idle_fn(busy); -#ifndef XEN /* Switch timers mack into active mode. */ if (!busy) { cpu_activeclock(); critical_exit(); } +#ifndef XEN out: #endif CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", Modified: head/sys/i386/xen/clock.c ============================================================================== --- head/sys/i386/xen/clock.c Fri May 13 10:36:38 2011 (r221834) +++ head/sys/i386/xen/clock.c Fri May 13 12:39:37 2011 (r221835) @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mutex.h> #include <sys/proc.h> #include <sys/time.h> +#include <sys/timeet.h> #include <sys/timetc.h> #include <sys/kernel.h> #include <sys/limits.h> @@ -301,38 +302,44 @@ static struct timecounter xen_timecounte 0 /* quality */ }; +static struct eventtimer xen_et; + +struct xen_et_state { + int mode; +#define MODE_STOP 0 +#define MODE_PERIODIC 1 +#define MODE_ONESHOT 2 + int64_t period; + int64_t next; +}; + +static DPCPU_DEFINE(struct xen_et_state, et_state); + static int clkintr(void *arg) { - int64_t delta_cpu, delta; - struct trapframe *frame = (struct trapframe *)arg; + int64_t now; int cpu = smp_processor_id(); struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); + struct xen_et_state *state = DPCPU_PTR(et_state); do { __get_time_values_from_xen(); - - delta = delta_cpu = - shadow->system_timestamp + get_nsec_offset(shadow); - delta -= processed_system_time; - delta_cpu -= per_cpu(processed_system_time, cpu); - + now = shadow->system_timestamp + get_nsec_offset(shadow); } while (!time_values_up_to_date(cpu)); - - if (unlikely(delta < (int64_t)0) || unlikely(delta_cpu < (int64_t)0)) { - printf("Timer ISR: Time went backwards: %lld\n", delta); - return (FILTER_HANDLED); - } - + /* Process elapsed ticks since last call. */ - while (delta >= NS_PER_TICK) { - delta -= NS_PER_TICK; - processed_system_time += NS_PER_TICK; - per_cpu(processed_system_time, cpu) += NS_PER_TICK; - if (PCPU_GET(cpuid) == 0) - hardclock(TRAPF_USERMODE(frame), TRAPF_PC(frame)); - else - hardclock_cpu(TRAPF_USERMODE(frame)); + processed_system_time = now; + if (state->mode == MODE_PERIODIC) { + while (now >= state->next) { + state->next += state->period; + if (xen_et.et_active) + xen_et.et_event_cb(&xen_et, xen_et.et_arg); + } + HYPERVISOR_set_timer_op(state->next + 50000); + } else if (state->mode == MODE_ONESHOT) { + if (xen_et.et_active) + xen_et.et_event_cb(&xen_et, xen_et.et_arg); } /* * Take synchronised time from Xen once a minute if we're not @@ -484,12 +491,14 @@ DELAY(int n) void timer_restore(void) { + struct xen_et_state *state = DPCPU_PTR(et_state); + /* Get timebases for new environment. */ __get_time_values_from_xen(); /* Reset our own concept of passage of system time. */ processed_system_time = per_cpu(shadow_time, 0).system_timestamp; - per_cpu(processed_system_time, 0) = processed_system_time; + state->next = processed_system_time; } void @@ -503,7 +512,6 @@ startrtclock() /* initialize xen values */ __get_time_values_from_xen(); processed_system_time = per_cpu(shadow_time, 0).system_timestamp; - per_cpu(processed_system_time, 0) = processed_system_time; __cpu_khz = 1000000ULL << 32; info = &HYPERVISOR_shared_info->vcpu_info[0].time; @@ -759,7 +767,49 @@ resettodr() } #endif -static struct vcpu_set_periodic_timer xen_set_periodic_tick; +static int +xen_et_start(struct eventtimer *et, + struct bintime *first, struct bintime *period) +{ + struct xen_et_state *state = DPCPU_PTR(et_state); + struct shadow_time_info *shadow; + int64_t fperiod; + + __get_time_values_from_xen(); + + if (period != NULL) { + state->mode = MODE_PERIODIC; + state->period = (1000000000LL * + (uint32_t)(period->frac >> 32)) >> 32; + if (period->sec != 0) + state->period += 1000000000LL * period->sec; + } else { + state->mode = MODE_ONESHOT; + state->period = 0; + } + if (first != NULL) { + fperiod = (1000000000LL * (uint32_t)(first->frac >> 32)) >> 32; + if (first->sec != 0) + fperiod += 1000000000LL * first->sec; + } else + fperiod = state->period; + + shadow = &per_cpu(shadow_time, smp_processor_id()); + state->next = shadow->system_timestamp + get_nsec_offset(shadow); + state->next += fperiod; + HYPERVISOR_set_timer_op(state->next + 50000); + return (0); +} + +static int +xen_et_stop(struct eventtimer *et) +{ + struct xen_et_state *state = DPCPU_PTR(et_state); + + state->mode = MODE_STOP; + HYPERVISOR_set_timer_op(0); + return (0); +} /* * Start clocks running. @@ -770,56 +820,48 @@ cpu_initclocks(void) unsigned int time_irq; int error; - xen_set_periodic_tick.period_ns = NS_PER_TICK; - - HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0, - &xen_set_periodic_tick); - - error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk", - clkintr, NULL, NULL, - INTR_TYPE_CLK, &time_irq); + HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, 0, NULL); + error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "cpu0:timer", + clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq); if (error) panic("failed to register clock interrupt\n"); /* should fast clock be enabled ? */ - + + bzero(&xen_et, sizeof(xen_et)); + xen_et.et_name = "ixen"; + xen_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT | + ET_FLAGS_PERCPU; + xen_et.et_quality = 600; + xen_et.et_frequency = 0; + xen_et.et_min_period.sec = 0; + xen_et.et_min_period.frac = 0x00400000LL << 32; + xen_et.et_max_period.sec = 2; + xen_et.et_max_period.frac = 0; + xen_et.et_start = xen_et_start; + xen_et.et_stop = xen_et_stop; + xen_et.et_priv = NULL; + et_register(&xen_et); + + cpu_initclocks_bsp(); } int ap_cpu_initclocks(int cpu) { + char buf[MAXCOMLEN + 1]; unsigned int time_irq; int error; - xen_set_periodic_tick.period_ns = NS_PER_TICK; - - HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu, - &xen_set_periodic_tick); - error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk", - clkintr, NULL, NULL, - INTR_TYPE_CLK, &time_irq); + HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL); + snprintf(buf, sizeof(buf), "cpu%d:timer", cpu); + error = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, buf, + clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq); if (error) panic("failed to register clock interrupt\n"); - return (0); } - -void -cpu_startprofclock(void) -{ - - printf("cpu_startprofclock: profiling clock is not supported\n"); -} - -void -cpu_stopprofclock(void) -{ - - printf("cpu_stopprofclock: profiling clock is not supported\n"); -} -#define NSEC_PER_USEC 1000 - static uint32_t xen_get_timecount(struct timecounter *tc) { @@ -842,45 +884,11 @@ get_system_time(int ticks) return processed_system_time + (ticks * NS_PER_TICK); } -/* - * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c - */ - - -/* Convert jiffies to system time. */ -static uint64_t -ticks_to_system_time(int newticks) -{ - int delta; - uint64_t st; - - delta = newticks - ticks; - if (delta < 1) { - /* Triggers in some wrap-around cases, - * but that's okay: - * we just end up with a shorter timeout. */ - st = processed_system_time + NS_PER_TICK; - } else if (((unsigned int)delta >> (BITS_PER_LONG-3)) != 0) { - /* Very long timeout means there is no pending timer. - * We indicate this to Xen by passing zero timeout. */ - st = 0; - } else { - st = processed_system_time + delta * (uint64_t)NS_PER_TICK; - } - - return (st); -} - void idle_block(void) { - uint64_t timeout; - - timeout = ticks_to_system_time(ticks + 1) + NS_PER_TICK/2; - __get_time_values_from_xen(); - PANIC_IF(HYPERVISOR_set_timer_op(timeout) != 0); - HYPERVISOR_sched_op(SCHEDOP_block, 0); + HYPERVISOR_sched_op(SCHEDOP_block, 0); } int @@ -903,6 +911,3 @@ timer_spkr_setfreq(int freq) } - - - Modified: head/sys/i386/xen/mp_machdep.c ============================================================================== --- head/sys/i386/xen/mp_machdep.c Fri May 13 10:36:38 2011 (r221834) +++ head/sys/i386/xen/mp_machdep.c Fri May 13 12:39:37 2011 (r221835) @@ -628,8 +628,11 @@ init_secondary(void) while (smp_started == 0) ia32_pause(); - PCPU_SET(curthread, PCPU_GET(idlethread)); + + /* Start per-CPU event timers. */ + cpu_initclocks_ap(); + /* enter the scheduler */ sched_throw(NULL); Modified: head/sys/kern/kern_clocksource.c ============================================================================== --- head/sys/kern/kern_clocksource.c Fri May 13 10:36:38 2011 (r221834) +++ head/sys/kern/kern_clocksource.c Fri May 13 12:39:37 2011 (r221835) @@ -31,9 +31,6 @@ __FBSDID("$FreeBSD$"); * Common routines to manage event timers hardware. */ -/* XEN has own timer routines now. */ -#ifndef XEN - #include "opt_device_polling.h" #include "opt_kdtrace.h" @@ -899,5 +896,3 @@ sysctl_kern_eventtimer_periodic(SYSCTL_H SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode"); - -#endif
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201105131239.p4DCdccA071147>