From owner-svn-src-head@FreeBSD.ORG Sat Dec 7 23:11:13 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id CF4D0FE; Sat, 7 Dec 2013 23:11:13 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id BB0E410CB; Sat, 7 Dec 2013 23:11:13 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id rB7NBDTP072044; Sat, 7 Dec 2013 23:11:13 GMT (envelope-from neel@svn.freebsd.org) Received: (from neel@localhost) by svn.freebsd.org (8.14.7/8.14.7/Submit) id rB7NBCO1072034; Sat, 7 Dec 2013 23:11:12 GMT (envelope-from neel@svn.freebsd.org) Message-Id: <201312072311.rB7NBCO1072034@svn.freebsd.org> From: Neel Natu Date: Sat, 7 Dec 2013 23:11:12 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r259085 - in head/sys/amd64/vmm: . intel io X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.17 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 07 Dec 2013 23:11:14 -0000 Author: neel Date: Sat Dec 7 23:11:12 2013 New Revision: 259085 URL: http://svnweb.freebsd.org/changeset/base/259085 Log: Use callout(9) to drive the vlapic timer instead of clocking it on each VM exit. This decouples the guest's 'hz' from the host's 'hz' setting. For e.g. it is now possible to have a guest run at 'hz=1000' while the host is at 'hz=100'. Discussed with: grehan@ Tested by: Tycho Nightingale (tycho.nightingale@pluribusnetworks.com) Modified: head/sys/amd64/vmm/intel/vmx.c head/sys/amd64/vmm/io/vlapic.c head/sys/amd64/vmm/io/vlapic.h head/sys/amd64/vmm/vmm.c head/sys/amd64/vmm/vmm_lapic.c head/sys/amd64/vmm/vmm_lapic.h Modified: head/sys/amd64/vmm/intel/vmx.c ============================================================================== --- head/sys/amd64/vmm/intel/vmx.c Sat Dec 7 23:05:59 2013 (r259084) +++ head/sys/amd64/vmm/intel/vmx.c Sat Dec 7 23:11:12 2013 (r259085) @@ -1563,7 +1563,6 @@ vmx_run(void *arg, int vcpu, register_t panic("vmx_run: error %d setting up pcpu defaults", error); do { - lapic_timer_tick(vmx->vm, vcpu); vmx_inject_interrupts(vmx, vcpu); vmx_run_trace(vmx, vcpu); rc = vmx_setjmp(vmxctx); Modified: head/sys/amd64/vmm/io/vlapic.c ============================================================================== --- head/sys/amd64/vmm/io/vlapic.c Sat Dec 7 23:05:59 2013 (r259084) +++ head/sys/amd64/vmm/io/vlapic.c Sat Dec 7 23:11:12 2013 (r259085) @@ -30,8 +30,10 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include +#include #include #include @@ -103,12 +105,15 @@ struct vlapic { struct vm *vm; int vcpuid; - struct LAPIC apic; + struct LAPIC apic; int esr_update; - int divisor; - int ccr_ticks; + struct callout callout; /* vlapic timer */ + struct bintime timer_fire_bt; /* callout expiry time */ + struct bintime timer_freq_bt; /* timer frequency */ + struct bintime timer_period_bt; /* timer period */ + struct mtx timer_mtx; /* * The 'isrvec_stk' is a stack of vectors injected by the local apic. @@ -123,6 +128,21 @@ struct vlapic { enum boot_state boot_state; }; +/* + * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the + * vlapic_callout_handler() and vcpu accesses to the following registers: + * - initial count register aka icr_timer + * - current count register aka ccr_timer + * - divide config register aka dcr_timer + * - timer LVT register + * + * Note that the vlapic_callout_handler() does not write to any of these + * registers so they can be safely read from the vcpu context without locking. + */ +#define VLAPIC_TIMER_LOCK(vlapic) mtx_lock(&((vlapic)->timer_mtx)) +#define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock(&((vlapic)->timer_mtx)) +#define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx)) + #define VLAPIC_BUS_FREQ tsc_freq static int @@ -170,11 +190,62 @@ vlapic_dump_lvt(uint32_t offset, uint32_ } #endif -static uint64_t +static uint32_t vlapic_get_ccr(struct vlapic *vlapic) { - struct LAPIC *lapic = &vlapic->apic; - return lapic->ccr_timer; + struct bintime bt_now, bt_rem; + struct LAPIC *lapic; + uint32_t ccr; + + ccr = 0; + lapic = &vlapic->apic; + + VLAPIC_TIMER_LOCK(vlapic); + if (callout_active(&vlapic->callout)) { + /* + * If the timer is scheduled to expire in the future then + * compute the value of 'ccr' based on the remaining time. + */ + binuptime(&bt_now); + if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) { + bt_rem = vlapic->timer_fire_bt; + bintime_sub(&bt_rem, &bt_now); + ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt); + ccr += bt_rem.frac / vlapic->timer_freq_bt.frac; + } + } + KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, " + "icr_timer is %#x", ccr, lapic->icr_timer)); + VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x", + ccr, lapic->icr_timer); + VLAPIC_TIMER_UNLOCK(vlapic); + return (ccr); +} + +static void +vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr) +{ + struct LAPIC *lapic; + int divisor; + + lapic = &vlapic->apic; + VLAPIC_TIMER_LOCK(vlapic); + + lapic->dcr_timer = dcr; + divisor = vlapic_timer_divisor(dcr); + VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", dcr, divisor); + + /* + * Update the timer frequency and the timer period. + * + * XXX changes to the frequency divider will not take effect until + * the timer is reloaded. + */ + FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt); + vlapic->timer_period_bt = vlapic->timer_freq_bt; + bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer); + + VLAPIC_TIMER_UNLOCK(vlapic); } static void @@ -203,7 +274,7 @@ vlapic_reset(struct vlapic *vlapic) memset(lapic, 0, sizeof(*lapic)); lapic->apr = vlapic->vcpuid; vlapic_init_ipi(vlapic); - vlapic->divisor = vlapic_timer_divisor(lapic->dcr_timer); + vlapic_set_dcr(vlapic, 0); if (vlapic->vcpuid == 0) vlapic->boot_state = BS_RUNNING; /* BSP */ @@ -250,30 +321,8 @@ vlapic_set_intr_ready(struct vlapic *vla VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); } -static void -vlapic_start_timer(struct vlapic *vlapic, uint32_t elapsed) -{ - uint32_t icr_timer; - - icr_timer = vlapic->apic.icr_timer; - - vlapic->ccr_ticks = ticks; - if (elapsed < icr_timer) - vlapic->apic.ccr_timer = icr_timer - elapsed; - else { - /* - * This can happen when the guest is trying to run its local - * apic timer higher that the setting of 'hz' in the host. - * - * We deal with this by running the guest local apic timer - * at the rate of the host's 'hz' setting. - */ - vlapic->apic.ccr_timer = 0; - } -} - static __inline uint32_t * -vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) +vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) { struct LAPIC *lapic = &vlapic->apic; int i; @@ -285,6 +334,33 @@ vlapic_get_lvt(struct vlapic *vlapic, ui return ((&lapic->lvt_timer) + i);; } +static __inline uint32_t +vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) +{ + + return (*vlapic_get_lvtptr(vlapic, offset)); +} + +static void +vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val) +{ + uint32_t *lvtptr; + struct LAPIC *lapic; + + lapic = &vlapic->apic; + lvtptr = vlapic_get_lvtptr(vlapic, offset); + + if (offset == APIC_OFFSET_TIMER_LVT) + VLAPIC_TIMER_LOCK(vlapic); + + if (!(lapic->svr & APIC_SVR_ENABLE)) + val |= APIC_LVT_M; + *lvtptr = val; + + if (offset == APIC_OFFSET_TIMER_LVT) + VLAPIC_TIMER_UNLOCK(vlapic); +} + #if 1 static void dump_isrvec_stk(struct vlapic *vlapic) @@ -407,15 +483,16 @@ vlapic_process_eoi(struct vlapic *vlapic } static __inline int -vlapic_get_lvt_field(uint32_t *lvt, uint32_t mask) +vlapic_get_lvt_field(uint32_t lvt, uint32_t mask) { - return (*lvt & mask); + + return (lvt & mask); } static __inline int vlapic_periodic_timer(struct vlapic *vlapic) { - uint32_t *lvt; + uint32_t lvt; lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); @@ -428,15 +505,109 @@ static void vlapic_fire_timer(struct vlapic *vlapic) { int vector; - uint32_t *lvt; + uint32_t lvt; + + KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked")); lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT); if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) { vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1); - vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR); + vector = vlapic_get_lvt_field(lvt, APIC_LVTT_VECTOR); vlapic_set_intr_ready(vlapic, vector, false); + vcpu_notify_event(vlapic->vm, vlapic->vcpuid); + } +} + +static void +vlapic_callout_handler(void *arg) +{ + struct vlapic *vlapic; + struct bintime bt, btnow; + sbintime_t rem_sbt; + + vlapic = arg; + + VLAPIC_TIMER_LOCK(vlapic); + if (callout_pending(&vlapic->callout)) /* callout was reset */ + goto done; + + if (!callout_active(&vlapic->callout)) /* callout was stopped */ + goto done; + + callout_deactivate(&vlapic->callout); + + KASSERT(vlapic->apic.icr_timer != 0, ("vlapic timer is disabled")); + + vlapic_fire_timer(vlapic); + + if (vlapic_periodic_timer(vlapic)) { + binuptime(&btnow); + KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=), + ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx", + btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec, + vlapic->timer_fire_bt.frac)); + + /* + * Compute the delta between when the timer was supposed to + * fire and the present time. + */ + bt = btnow; + bintime_sub(&bt, &vlapic->timer_fire_bt); + + rem_sbt = bttosbt(vlapic->timer_period_bt); + if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) { + /* + * Adjust the time until the next countdown downward + * to account for the lost time. + */ + rem_sbt -= bttosbt(bt); + } else { + /* + * If the delta is greater than the timer period then + * just reset our time base instead of trying to catch + * up. + */ + vlapic->timer_fire_bt = btnow; + VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu " + "usecs, period is %lu usecs - resetting time base", + bttosbt(bt) / SBT_1US, + bttosbt(vlapic->timer_period_bt) / SBT_1US); + } + + bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); + callout_reset_sbt(&vlapic->callout, rem_sbt, 0, + vlapic_callout_handler, vlapic, 0); } +done: + VLAPIC_TIMER_UNLOCK(vlapic); +} + +static void +vlapic_set_icr_timer(struct vlapic *vlapic, uint32_t icr_timer) +{ + struct LAPIC *lapic; + sbintime_t sbt; + + VLAPIC_TIMER_LOCK(vlapic); + + lapic = &vlapic->apic; + lapic->icr_timer = icr_timer; + + vlapic->timer_period_bt = vlapic->timer_freq_bt; + bintime_mul(&vlapic->timer_period_bt, icr_timer); + + if (icr_timer != 0) { + binuptime(&vlapic->timer_fire_bt); + bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt); + + sbt = bttosbt(vlapic->timer_period_bt); + callout_reset_sbt(&vlapic->callout, sbt, 0, + vlapic_callout_handler, vlapic, 0); + } else + callout_stop(&vlapic->callout); + + VLAPIC_TIMER_UNLOCK(vlapic); } static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); @@ -564,7 +735,6 @@ vlapic_pending_intr(struct vlapic *vlapi break; } } - VLAPIC_CTR0(vlapic, "no pending intr"); return (-1); } @@ -613,9 +783,21 @@ lapic_set_svr(struct vlapic *vlapic, uin changed = old ^ new; if ((changed & APIC_SVR_ENABLE) != 0) { if ((new & APIC_SVR_ENABLE) == 0) { + /* + * The apic is now disabled so stop the apic timer. + */ VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); + VLAPIC_TIMER_LOCK(vlapic); + callout_stop(&vlapic->callout); + VLAPIC_TIMER_UNLOCK(vlapic); } else { + /* + * The apic is now enabled so restart the apic timer + * if it is configured in periodic mode. + */ VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); + if (vlapic_periodic_timer(vlapic)) + vlapic_set_icr_timer(vlapic, lapic->icr_timer); } } lapic->svr = new; @@ -691,8 +873,7 @@ vlapic_read(struct vlapic *vlapic, uint6 *data = lapic->icr_hi; break; case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: - reg = vlapic_get_lvt(vlapic, offset); - *data = *(reg); + *data = vlapic_get_lvt(vlapic, offset); break; case APIC_OFFSET_ICR: *data = lapic->icr_timer; @@ -717,7 +898,6 @@ int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data) { struct LAPIC *lapic = &vlapic->apic; - uint32_t *reg; int retval; VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data); @@ -760,21 +940,14 @@ vlapic_write(struct vlapic *vlapic, uint } break; case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: - reg = vlapic_get_lvt(vlapic, offset); - if (!(lapic->svr & APIC_SVR_ENABLE)) { - data |= APIC_LVT_M; - } - *reg = data; - // vlapic_dump_lvt(offset, reg); + vlapic_set_lvt(vlapic, offset, data); break; case APIC_OFFSET_ICR: - lapic->icr_timer = data; - vlapic_start_timer(vlapic, 0); + vlapic_set_icr_timer(vlapic, data); break; case APIC_OFFSET_DCR: - lapic->dcr_timer = data; - vlapic->divisor = vlapic_timer_divisor(data); + vlapic_set_dcr(vlapic, data); break; case APIC_OFFSET_ESR: @@ -796,70 +969,6 @@ vlapic_write(struct vlapic *vlapic, uint return (retval); } -int -vlapic_timer_tick(struct vlapic *vlapic) -{ - int curticks, delta, periodic, fired; - uint32_t ccr; - uint32_t decrement, leftover; - -restart: - curticks = ticks; - delta = curticks - vlapic->ccr_ticks; - - /* Local APIC timer is disabled */ - if (vlapic->apic.icr_timer == 0) - return (-1); - - /* One-shot mode and timer has already counted down to zero */ - periodic = vlapic_periodic_timer(vlapic); - if (!periodic && vlapic->apic.ccr_timer == 0) - return (-1); - /* - * The 'curticks' and 'ccr_ticks' are out of sync by more than - * 2^31 ticks. We deal with this by restarting the timer. - */ - if (delta < 0) { - vlapic_start_timer(vlapic, 0); - goto restart; - } - - fired = 0; - decrement = (VLAPIC_BUS_FREQ / vlapic->divisor) / hz; - - vlapic->ccr_ticks = curticks; - ccr = vlapic->apic.ccr_timer; - - while (delta-- > 0) { - if (ccr > decrement) { - ccr -= decrement; - continue; - } - - /* Trigger the local apic timer interrupt */ - vlapic_fire_timer(vlapic); - if (periodic) { - leftover = decrement - ccr; - vlapic_start_timer(vlapic, leftover); - ccr = vlapic->apic.ccr_timer; - } else { - /* - * One-shot timer has counted down to zero. - */ - ccr = 0; - } - fired = 1; - break; - } - - vlapic->apic.ccr_timer = ccr; - - if (!fired) - return ((ccr / decrement) + 1); - else - return (0); -} - struct vlapic * vlapic_init(struct vm *vm, int vcpuid) { @@ -869,6 +978,9 @@ vlapic_init(struct vm *vm, int vcpuid) vlapic->vm = vm; vlapic->vcpuid = vcpuid; + mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_DEF); + callout_init(&vlapic->callout, 1); + vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; if (vcpuid == 0) @@ -883,6 +995,7 @@ void vlapic_cleanup(struct vlapic *vlapic) { + callout_drain(&vlapic->callout); free(vlapic, M_VLAPIC); } Modified: head/sys/amd64/vmm/io/vlapic.h ============================================================================== --- head/sys/amd64/vmm/io/vlapic.h Sat Dec 7 23:05:59 2013 (r259084) +++ head/sys/amd64/vmm/io/vlapic.h Sat Dec 7 23:11:12 2013 (r259085) @@ -95,7 +95,6 @@ int vlapic_read(struct vlapic *vlapic, u int vlapic_pending_intr(struct vlapic *vlapic); void vlapic_intr_accepted(struct vlapic *vlapic, int vector); void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level); -int vlapic_timer_tick(struct vlapic *vlapic); uint64_t vlapic_get_apicbase(struct vlapic *vlapic); void vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val); Modified: head/sys/amd64/vmm/vmm.c ============================================================================== --- head/sys/amd64/vmm/vmm.c Sat Dec 7 23:05:59 2013 (r259084) +++ head/sys/amd64/vmm/vmm.c Sat Dec 7 23:11:12 2013 (r259085) @@ -865,26 +865,13 @@ vm_handle_hlt(struct vm *vm, int vcpuid, { struct vm_exit *vmexit; struct vcpu *vcpu; - int sleepticks, t; + int t, timo; vcpu = &vm->vcpu[vcpuid]; vcpu_lock(vcpu); /* - * Figure out the number of host ticks until the next apic - * timer interrupt in the guest. - */ - sleepticks = lapic_timer_tick(vm, vcpuid); - - /* - * If the guest local apic timer is disabled then sleep for - * a long time but not forever. - */ - if (sleepticks < 0) - sleepticks = hz; - - /* * Do a final check for pending NMI or interrupts before * really putting this thread to sleep. * @@ -893,12 +880,15 @@ vm_handle_hlt(struct vm *vm, int vcpuid, */ if (!vm_nmi_pending(vm, vcpuid) && (intr_disabled || vlapic_pending_intr(vcpu->vlapic) < 0)) { - if (sleepticks <= 0) - panic("invalid sleepticks %d", sleepticks); t = ticks; vcpu_require_state_locked(vcpu, VCPU_SLEEPING); if (vlapic_enabled(vcpu->vlapic)) { - msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks); + /* + * XXX msleep_spin() is not interruptible so use the + * 'timo' to put an upper bound on the sleep time. + */ + timo = hz; + msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo); } else { /* * Spindown the vcpu if the apic is disabled and it Modified: head/sys/amd64/vmm/vmm_lapic.c ============================================================================== --- head/sys/amd64/vmm/vmm_lapic.c Sat Dec 7 23:05:59 2013 (r259084) +++ head/sys/amd64/vmm/vmm_lapic.c Sat Dec 7 23:11:12 2013 (r259085) @@ -80,16 +80,6 @@ lapic_set_intr(struct vm *vm, int cpu, i return (0); } -int -lapic_timer_tick(struct vm *vm, int cpu) -{ - struct vlapic *vlapic; - - vlapic = vm_lapic(vm, cpu); - - return (vlapic_timer_tick(vlapic)); -} - static boolean_t x2apic_msr(u_int msr) { Modified: head/sys/amd64/vmm/vmm_lapic.h ============================================================================== --- head/sys/amd64/vmm/vmm_lapic.h Sat Dec 7 23:05:59 2013 (r259084) +++ head/sys/amd64/vmm/vmm_lapic.h Sat Dec 7 23:11:12 2013 (r259085) @@ -40,8 +40,6 @@ int lapic_mmio_read(void *vm, int cpu, u int lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size, void *arg); -int lapic_timer_tick(struct vm *vm, int cpu); - /* * Returns a vector between 32 and 255 if an interrupt is pending in the * IRR that can be delivered based on the current state of ISR and TPR.