From owner-svn-src-head@FreeBSD.ORG Mon Feb 11 20:36:09 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by hub.freebsd.org (Postfix) with ESMTP id DD827B63; Mon, 11 Feb 2013 20:36:09 +0000 (UTC) (envelope-from neel@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id C177AC3E; Mon, 11 Feb 2013 20:36:09 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id r1BKa906037252; Mon, 11 Feb 2013 20:36:09 GMT (envelope-from neel@svn.freebsd.org) Received: (from neel@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id r1BKa8PW037243; Mon, 11 Feb 2013 20:36:08 GMT (envelope-from neel@svn.freebsd.org) Message-Id: <201302112036.r1BKa8PW037243@svn.freebsd.org> From: Neel Natu Date: Mon, 11 Feb 2013 20:36:08 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r246686 - in head: lib/libvmmapi sys/amd64/include sys/amd64/vmm sys/amd64/vmm/io usr.sbin/bhyve usr.sbin/bhyvectl X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 11 Feb 2013 20:36:09 -0000 Author: neel Date: Mon Feb 11 20:36:07 2013 New Revision: 246686 URL: http://svnweb.freebsd.org/changeset/base/246686 Log: Implement guest vcpu pinning using 'pthread_setaffinity_np(3)'. Prior to this change pinning was implemented via an ioctl (VM_SET_PINNING) that called 'sched_bind()' on behalf of the user thread. The ULE implementation of 'sched_bind()' bumps up 'td_pinned' which in turn runs afoul of the assertion '(td_pinned == 0)' in userret(). Using the cpuset affinity to implement pinning of the vcpu threads works with both 4BSD and ULE schedulers and has the happy side-effect of getting rid of a bunch of code in vmm.ko. Discussed with: grehan Modified: head/lib/libvmmapi/vmmapi.c head/lib/libvmmapi/vmmapi.h head/sys/amd64/include/vmm.h head/sys/amd64/include/vmm_dev.h head/sys/amd64/vmm/io/ppt.c head/sys/amd64/vmm/vmm.c head/sys/amd64/vmm/vmm_dev.c head/usr.sbin/bhyve/bhyverun.c head/usr.sbin/bhyvectl/bhyvectl.c Modified: head/lib/libvmmapi/vmmapi.c ============================================================================== --- head/lib/libvmmapi/vmmapi.c Mon Feb 11 19:23:23 2013 (r246685) +++ head/lib/libvmmapi/vmmapi.c Mon Feb 11 20:36:07 2013 (r246686) @@ -250,34 +250,6 @@ vm_get_register(struct vmctx *ctx, int v } int -vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid) -{ - int error; - struct vm_pin vmpin; - - bzero(&vmpin, sizeof(vmpin)); - vmpin.vm_cpuid = vcpu; - - error = ioctl(ctx->fd, VM_GET_PINNING, &vmpin); - *host_cpuid = vmpin.host_cpuid; - return (error); -} - -int -vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid) -{ - int error; - struct vm_pin vmpin; - - bzero(&vmpin, sizeof(vmpin)); - vmpin.vm_cpuid = vcpu; - vmpin.host_cpuid = host_cpuid; - - error = ioctl(ctx->fd, VM_SET_PINNING, &vmpin); - return (error); -} - -int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit) { int error; Modified: head/lib/libvmmapi/vmmapi.h ============================================================================== --- head/lib/libvmmapi/vmmapi.h Mon Feb 11 19:23:23 2013 (r246685) +++ head/lib/libvmmapi/vmmapi.h Mon Feb 11 20:36:07 2013 (r246686) @@ -56,8 +56,6 @@ int vm_get_desc(struct vmctx *ctx, int v uint64_t *base, uint32_t *limit, uint32_t *access); int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val); int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval); -int vm_get_pinning(struct vmctx *ctx, int vcpu, int *host_cpuid); -int vm_set_pinning(struct vmctx *ctx, int vcpu, int host_cpuid); int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *ret_vmexit); int vm_apicid2vcpu(struct vmctx *ctx, int apicid); Modified: head/sys/amd64/include/vmm.h ============================================================================== --- head/sys/amd64/include/vmm.h Mon Feb 11 19:23:23 2013 (r246685) +++ head/sys/amd64/include/vmm.h Mon Feb 11 20:36:07 2013 (r246686) @@ -102,8 +102,6 @@ int vm_get_seg_desc(struct vm *vm, int v struct seg_desc *ret_desc); int vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc); -int vm_get_pinning(struct vm *vm, int vcpu, int *cpuid); -int vm_set_pinning(struct vm *vm, int vcpu, int cpuid); int vm_run(struct vm *vm, struct vm_run *vmrun); int vm_inject_event(struct vm *vm, int vcpu, int type, int vector, uint32_t error_code, int error_code_valid); Modified: head/sys/amd64/include/vmm_dev.h ============================================================================== --- head/sys/amd64/include/vmm_dev.h Mon Feb 11 19:23:23 2013 (r246685) +++ head/sys/amd64/include/vmm_dev.h Mon Feb 11 20:36:07 2013 (r246686) @@ -51,11 +51,6 @@ struct vm_seg_desc { /* data or code s struct seg_desc desc; }; -struct vm_pin { - int vm_cpuid; - int host_cpuid; /* -1 to unpin */ -}; - struct vm_run { int cpuid; uint64_t rip; /* start running here */ @@ -142,8 +137,6 @@ struct vm_x2apic { enum { IOCNUM_RUN, - IOCNUM_SET_PINNING, - IOCNUM_GET_PINNING, IOCNUM_MAP_MEMORY, IOCNUM_GET_MEMORY_SEG, IOCNUM_SET_REGISTER, @@ -168,10 +161,6 @@ enum { #define VM_RUN \ _IOWR('v', IOCNUM_RUN, struct vm_run) -#define VM_SET_PINNING \ - _IOW('v', IOCNUM_SET_PINNING, struct vm_pin) -#define VM_GET_PINNING \ - _IOWR('v', IOCNUM_GET_PINNING, struct vm_pin) #define VM_MAP_MEMORY \ _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment) #define VM_GET_MEMORY_SEG \ Modified: head/sys/amd64/vmm/io/ppt.c ============================================================================== --- head/sys/amd64/vmm/io/ppt.c Mon Feb 11 19:23:23 2013 (r246685) +++ head/sys/amd64/vmm/io/ppt.c Mon Feb 11 20:36:07 2013 (r246686) @@ -402,31 +402,6 @@ pptintr(void *arg) return (FILTER_HANDLED); } -/* - * XXX - * When we try to free the MSI resource the kernel will bind the thread to - * the host cpu was originally handling the MSI. The function freeing the - * MSI vector (apic_free_vector()) will panic the kernel if the thread - * is already bound to a cpu. - * - * So, we temporarily unbind the vcpu thread before freeing the MSI resource. - */ -static void -PPT_TEARDOWN_MSI(struct vm *vm, int vcpu, struct pptdev *ppt) -{ - int pincpu = -1; - - vm_get_pinning(vm, vcpu, &pincpu); - - if (pincpu >= 0) - vm_set_pinning(vm, vcpu, -1); - - ppt_teardown_msi(ppt); - - if (pincpu >= 0) - vm_set_pinning(vm, vcpu, pincpu); -} - int ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, int destcpu, int vector, int numvec) @@ -447,7 +422,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, i return (EBUSY); /* Free any allocated resources */ - PPT_TEARDOWN_MSI(vm, vcpu, ppt); + ppt_teardown_msi(ppt); if (numvec == 0) /* nothing more to do */ return (0); @@ -513,7 +488,7 @@ ppt_setup_msi(struct vm *vm, int vcpu, i } if (i < numvec) { - PPT_TEARDOWN_MSI(vm, vcpu, ppt); + ppt_teardown_msi(ppt); return (ENXIO); } Modified: head/sys/amd64/vmm/vmm.c ============================================================================== --- head/sys/amd64/vmm/vmm.c Mon Feb 11 19:23:23 2013 (r246685) +++ head/sys/amd64/vmm/vmm.c Mon Feb 11 20:36:07 2013 (r246686) @@ -70,7 +70,6 @@ struct vcpu { int flags; enum vcpu_state state; struct mtx mtx; - int pincpu; /* host cpuid this vcpu is bound to */ int hostcpu; /* host cpuid this vcpu last ran on */ uint64_t guest_msrs[VMM_MSR_NUM]; struct vlapic *vlapic; @@ -81,18 +80,6 @@ struct vcpu { enum x2apic_state x2apic_state; int nmi_pending; }; -#define VCPU_F_PINNED 0x0001 - -#define VCPU_PINCPU(vm, vcpuid) \ - ((vm->vcpu[vcpuid].flags & VCPU_F_PINNED) ? vm->vcpu[vcpuid].pincpu : -1) - -#define VCPU_UNPIN(vm, vcpuid) (vm->vcpu[vcpuid].flags &= ~VCPU_F_PINNED) - -#define VCPU_PIN(vm, vcpuid, host_cpuid) \ -do { \ - vm->vcpu[vcpuid].flags |= VCPU_F_PINNED; \ - vm->vcpu[vcpuid].pincpu = host_cpuid; \ -} while(0) #define vcpu_lock_init(v) mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN) #define vcpu_lock(v) mtx_lock_spin(&((v)->mtx)) @@ -594,52 +581,6 @@ vm_set_seg_desc(struct vm *vm, int vcpu, return (VMSETDESC(vm->cookie, vcpu, reg, desc)); } -int -vm_get_pinning(struct vm *vm, int vcpuid, int *cpuid) -{ - - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) - return (EINVAL); - - *cpuid = VCPU_PINCPU(vm, vcpuid); - - return (0); -} - -int -vm_set_pinning(struct vm *vm, int vcpuid, int host_cpuid) -{ - struct thread *td; - - if (vcpuid < 0 || vcpuid >= VM_MAXCPU) - return (EINVAL); - - td = curthread; /* XXXSMP only safe when muxing vcpus */ - - /* unpin */ - if (host_cpuid < 0) { - VCPU_UNPIN(vm, vcpuid); - thread_lock(td); - sched_unbind(td); - thread_unlock(td); - return (0); - } - - if (CPU_ABSENT(host_cpuid)) - return (EINVAL); - - /* - * XXX we should check that 'host_cpuid' has not already been pinned - * by another vm. - */ - thread_lock(td); - sched_bind(td, host_cpuid); - thread_unlock(td); - VCPU_PIN(vm, vcpuid, host_cpuid); - - return (0); -} - static void restore_guest_fpustate(struct vcpu *vcpu) { Modified: head/sys/amd64/vmm/vmm_dev.c ============================================================================== --- head/sys/amd64/vmm/vmm_dev.c Mon Feb 11 19:23:23 2013 (r246685) +++ head/sys/amd64/vmm/vmm_dev.c Mon Feb 11 20:36:07 2013 (r246686) @@ -144,7 +144,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long c struct vm_memory_segment *seg; struct vm_register *vmreg; struct vm_seg_desc* vmsegdesc; - struct vm_pin *vmpin; struct vm_run *vmrun; struct vm_event *vmevent; struct vm_lapic_irq *vmirq; @@ -170,7 +169,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long c */ switch (cmd) { case VM_RUN: - case VM_SET_PINNING: case VM_GET_REGISTER: case VM_SET_REGISTER: case VM_GET_SEGMENT_DESCRIPTOR: @@ -301,16 +299,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long c vmirq = (struct vm_lapic_irq *)data; error = lapic_set_intr(sc->vm, vmirq->cpuid, vmirq->vector); break; - case VM_SET_PINNING: - vmpin = (struct vm_pin *)data; - error = vm_set_pinning(sc->vm, vmpin->vm_cpuid, - vmpin->host_cpuid); - break; - case VM_GET_PINNING: - vmpin = (struct vm_pin *)data; - error = vm_get_pinning(sc->vm, vmpin->vm_cpuid, - &vmpin->host_cpuid); - break; case VM_MAP_MEMORY: seg = (struct vm_memory_segment *)data; error = vm_malloc(sc->vm, seg->gpa, seg->len); Modified: head/usr.sbin/bhyve/bhyverun.c ============================================================================== --- head/usr.sbin/bhyve/bhyverun.c Mon Feb 11 19:23:23 2013 (r246685) +++ head/usr.sbin/bhyve/bhyverun.c Mon Feb 11 20:36:07 2013 (r246686) @@ -520,13 +520,17 @@ static vmexit_handler_t handler[VM_EXITC static void vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip) { + cpuset_t mask; int error, rc, prevcpu; if (guest_vcpu_mux) setup_timeslice(); if (pincpu >= 0) { - error = vm_set_pinning(ctx, vcpu, pincpu + vcpu); + CPU_ZERO(&mask); + CPU_SET(pincpu + vcpu, &mask); + error = pthread_setaffinity_np(pthread_self(), + sizeof(mask), &mask); assert(error == 0); } Modified: head/usr.sbin/bhyvectl/bhyvectl.c ============================================================================== --- head/usr.sbin/bhyvectl/bhyvectl.c Mon Feb 11 19:23:23 2013 (r246685) +++ head/usr.sbin/bhyvectl/bhyvectl.c Mon Feb 11 20:36:07 2013 (r246686) @@ -183,8 +183,6 @@ usage(void) " [--get-vmcs-exit-interruption-info]\n" " [--get-vmcs-exit-interruption-error]\n" " [--get-vmcs-interruptibility]\n" - " [--set-pinning=]\n" - " [--get-pinning]\n" " [--set-x2apic-state=]\n" " [--get-x2apic-state]\n" " [--set-lowmem=]\n" @@ -218,7 +216,6 @@ static int set_desc_tr, get_desc_tr; static int set_desc_ldtr, get_desc_ldtr; static int set_cs, set_ds, set_es, set_fs, set_gs, set_ss, set_tr, set_ldtr; static int get_cs, get_ds, get_es, get_fs, get_gs, get_ss, get_tr, get_ldtr; -static int set_pinning, get_pinning, pincpu; static int set_x2apic_state, get_x2apic_state; enum x2apic_state x2apic_state; static int run; @@ -374,7 +371,6 @@ enum { SET_SS, SET_TR, SET_LDTR, - SET_PINNING, SET_X2APIC_STATE, SET_VMCS_EXCEPTION_BITMAP, SET_VMCS_ENTRY_INTERRUPTION_INFO, @@ -423,7 +419,6 @@ main(int argc, char *argv[]) { "set-ss", REQ_ARG, 0, SET_SS }, { "set-tr", REQ_ARG, 0, SET_TR }, { "set-ldtr", REQ_ARG, 0, SET_LDTR }, - { "set-pinning",REQ_ARG, 0, SET_PINNING }, { "set-x2apic-state",REQ_ARG, 0, SET_X2APIC_STATE }, { "set-vmcs-exception-bitmap", REQ_ARG, 0, SET_VMCS_EXCEPTION_BITMAP }, @@ -552,7 +547,6 @@ main(int argc, char *argv[]) NO_ARG, &get_vmcs_exit_interruption_error, 1}, { "get-vmcs-interruptibility", NO_ARG, &get_vmcs_interruptibility, 1 }, - { "get-pinning",NO_ARG, &get_pinning, 1 }, { "get-x2apic-state",NO_ARG, &get_x2apic_state, 1 }, { "get-all", NO_ARG, &get_all, 1 }, { "run", NO_ARG, &run, 1 }, @@ -659,10 +653,6 @@ main(int argc, char *argv[]) ldtr = strtoul(optarg, NULL, 0); set_ldtr = 1; break; - case SET_PINNING: - pincpu = strtol(optarg, NULL, 0); - set_pinning = 1; - break; case SET_X2APIC_STATE: x2apic_state = strtol(optarg, NULL, 0); set_x2apic_state = 1; @@ -812,9 +802,6 @@ main(int argc, char *argv[]) if (!error && set_ldtr) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_LDTR, ldtr); - if (!error && set_pinning) - error = vm_set_pinning(ctx, vcpu, pincpu); - if (!error && set_x2apic_state) error = vm_set_x2apic_state(ctx, vcpu, x2apic_state); @@ -1135,16 +1122,6 @@ main(int argc, char *argv[]) printf("ldtr[%d]\t\t0x%04lx\n", vcpu, ldtr); } - if (!error && (get_pinning || get_all)) { - error = vm_get_pinning(ctx, vcpu, &pincpu); - if (error == 0) { - if (pincpu < 0) - printf("pincpu[%d]\tunpinned\n", vcpu); - else - printf("pincpu[%d]\t%d\n", vcpu, pincpu); - } - } - if (!error && (get_x2apic_state || get_all)) { error = vm_get_x2apic_state(ctx, vcpu, &x2apic_state); if (error == 0)