Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 6 Sep 2013 22:17:02 +0000 (UTC)
From:      "Justin T. Gibbs" <gibbs@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r255331 - in head/sys: amd64/amd64 amd64/include i386/i386 i386/include i386/xen x86/xen xen
Message-ID:  <201309062217.r86MH2M8028267@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gibbs
Date: Fri Sep  6 22:17:02 2013
New Revision: 255331
URL: http://svnweb.freebsd.org/changeset/base/255331

Log:
  Implement PV IPIs for PVHVM guests and further converge PV and HVM
  IPI implmementations.
  
  Submitted by: Roger Pau Monné
  Sponsored by: Citrix Systems R&D
  Submitted by: gibbs (misc cleanup, table driven config)
  Reviewed by:  gibbs
  MFC after: 2 weeks
  
  sys/amd64/include/cpufunc.h:
  sys/amd64/amd64/pmap.c:
  	Move invltlb_globpcid() into cpufunc.h so that it can be
  	used by the Xen HVM version of tlb shootdown IPI handlers.
  
  sys/x86/xen/xen_intr.c:
  sys/xen/xen_intr.h:
  	Rename xen_intr_bind_ipi() to xen_intr_alloc_and_bind_ipi(),
  	and remove the ipi vector parameter.  This api allocates
  	an event channel port that can be used for ipi services,
  	but knows nothing of the actual ipi for which that port
  	will be used.  Removing the unused argument and cleaning
  	up the comments surrounding its declaration helps clarify
  	its actual role.
  
  sys/amd64/amd64/mp_machdep.c:
  sys/amd64/include/cpu.h:
  sys/i386/i386/mp_machdep.c:
  sys/i386/include/cpu.h:
  	Implement a generic framework for amd64 and i386 that allows
  	the implementation of certain CPU management functions to
  	be selected at runtime.  Currently this is only used for
  	the ipi send function, which we optimize for Xen when running
  	on a Xen hypervisor, but can easily be expanded to support
  	more operations.
  
  sys/x86/xen/hvm.c:
  	Implement Xen PV IPI handlers and operations, replacing native
  	send IPI.
  
  sys/amd64/include/pcpu.h:
  sys/i386/include/pcpu.h:
  sys/i386/include/smp.h:
  	Remove NR_VIRQS and NR_IPIS from FreeBSD headers.  NR_VIRQS
  	is defined already for us in the xen interface files.
  	NR_IPIS is only needed in one file per Xen platform and is
  	easily inferred by the IPI vector table that is defined in
  	those files.
  
  sys/i386/xen/mp_machdep.c:
  	Restructure to more closely match the HVM implementation by
  	performing table driven IPI setup.

Modified:
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/include/cpu.h
  head/sys/amd64/include/cpufunc.h
  head/sys/amd64/include/pcpu.h
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/include/cpu.h
  head/sys/i386/include/pcpu.h
  head/sys/i386/include/smp.h
  head/sys/i386/xen/mp_machdep.c
  head/sys/x86/xen/hvm.c
  head/sys/x86/xen/xen_intr.c
  head/sys/xen/xen_intr.h

Modified: head/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- head/sys/amd64/amd64/mp_machdep.c	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/amd64/amd64/mp_machdep.c	Fri Sep  6 22:17:02 2013	(r255331)
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/tss.h>
+#include <machine/cpu.h>
 
 #ifdef XENHVM
 #include <xen/hvm.h>
@@ -125,6 +126,11 @@ u_long *ipi_rendezvous_counts[MAXCPU];
 static u_long *ipi_hardclock_counts[MAXCPU];
 #endif
 
+/* Default cpu_ops implementation. */
+struct cpu_ops cpu_ops = {
+	.ipi_vectored = lapic_ipi_vectored
+};
+
 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
 
 extern int pmap_pcid_enabled;
@@ -1125,7 +1131,7 @@ ipi_send_cpu(int cpu, u_int ipi)
 		if (old_pending)
 			return;
 	}
-	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+	cpu_ops.ipi_vectored(ipi, cpu_apic_ids[cpu]);
 }
 
 /*
@@ -1395,7 +1401,7 @@ ipi_all_but_self(u_int ipi)
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
-	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+	cpu_ops.ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
 
 int

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/amd64/amd64/pmap.c	Fri Sep  6 22:17:02 2013	(r255331)
@@ -254,30 +254,6 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enab
     0, "Is TLB Context ID enabled ?");
 int invpcid_works = 0;
 
-/*
- * Perform the guaranteed invalidation of all TLB entries.  This
- * includes the global entries, and entries in all PCIDs, not only the
- * current context.  The function works both on non-PCID CPUs and CPUs
- * with the PCID turned off or on.  See IA-32 SDM Vol. 3a 4.10.4.1
- * Operations that Invalidate TLBs and Paging-Structure Caches.
- */
-static __inline void
-invltlb_globpcid(void)
-{
-	uint64_t cr4;
-
-	cr4 = rcr4();
-	load_cr4(cr4 & ~CR4_PGE);
-	/*
-	 * Although preemption at this point could be detrimental to
-	 * performance, it would not lead to an error.  PG_G is simply
-	 * ignored if CR4.PGE is clear.  Moreover, in case this block
-	 * is re-entered, the load_cr4() either above or below will
-	 * modify CR4.PGE flushing the TLB.
-	 */
-	load_cr4(cr4 | CR4_PGE);
-}
-
 static int
 pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
 {

Modified: head/sys/amd64/include/cpu.h
==============================================================================
--- head/sys/amd64/include/cpu.h	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/amd64/include/cpu.h	Fri Sep  6 22:17:02 2013	(r255331)
@@ -54,6 +54,17 @@
 #define	TRAPF_PC(framep)	((framep)->tf_rip)
 
 #ifdef _KERNEL
+/*
+ * Struct containing pointers to CPU management functions whose
+ * implementation is run time selectable.  Selection can be made,
+ * for example, based on detection of a particular CPU variant or
+ * hypervisor environment.
+ */
+struct cpu_ops {
+	void (*ipi_vectored)(u_int, int);
+};
+
+extern struct	cpu_ops cpu_ops;
 extern char	btext[];
 extern char	etext[];
 

Modified: head/sys/amd64/include/cpufunc.h
==============================================================================
--- head/sys/amd64/include/cpufunc.h	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/amd64/include/cpufunc.h	Fri Sep  6 22:17:02 2013	(r255331)
@@ -461,6 +461,34 @@ invltlb(void)
 	load_cr3(rcr3());
 }
 
+#ifndef CR4_PGE
+#define	CR4_PGE	0x00000080	/* Page global enable */
+#endif
+
+/*
+ * Perform the guaranteed invalidation of all TLB entries.  This
+ * includes the global entries, and entries in all PCIDs, not only the
+ * current context.  The function works both on non-PCID CPUs and CPUs
+ * with the PCID turned off or on.  See IA-32 SDM Vol. 3a 4.10.4.1
+ * Operations that Invalidate TLBs and Paging-Structure Caches.
+ */
+static __inline void
+invltlb_globpcid(void)
+{
+	uint64_t cr4;
+
+	cr4 = rcr4();
+	load_cr4(cr4 & ~CR4_PGE);
+	/*
+	 * Although preemption at this point could be detrimental to
+	 * performance, it would not lead to an error.  PG_G is simply
+	 * ignored if CR4.PGE is clear.  Moreover, in case this block
+	 * is re-entered, the load_cr4() either above or below will
+	 * modify CR4.PGE flushing the TLB.
+	 */
+	load_cr4(cr4 | CR4_PGE);
+}
+
 /*
  * TLB flush for an individual page (even if it has PG_G).
  * Only works on 486+ CPUs (i386 does not have PG_G).

Modified: head/sys/amd64/include/pcpu.h
==============================================================================
--- head/sys/amd64/include/pcpu.h	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/amd64/include/pcpu.h	Fri Sep  6 22:17:02 2013	(r255331)
@@ -33,15 +33,6 @@
 #error "sys/cdefs.h is a prerequisite for this file"
 #endif
 
-#if defined(XEN) || defined(XENHVM)
-#ifndef NR_VIRQS
-#define	NR_VIRQS	24
-#endif
-#ifndef NR_IPIS
-#define	NR_IPIS		2
-#endif
-#endif
-
 /*
  * The SMP parts are setup in pmap.c and locore.s for the BSP, and
  * mp_machdep.c sets up the data for the AP's to "see" when they awake.

Modified: head/sys/i386/i386/mp_machdep.c
==============================================================================
--- head/sys/i386/i386/mp_machdep.c	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/i386/i386/mp_machdep.c	Fri Sep  6 22:17:02 2013	(r255331)
@@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/psl.h>
 #include <machine/smp.h>
 #include <machine/specialreg.h>
+#include <machine/cpu.h>
 
 #ifdef XENHVM
 #include <xen/hvm.h>
@@ -170,6 +171,11 @@ u_long *ipi_lazypmap_counts[MAXCPU];
 static u_long *ipi_hardclock_counts[MAXCPU];
 #endif
 
+/* Default cpu_ops implementation. */
+struct cpu_ops cpu_ops = {
+	.ipi_vectored = lapic_ipi_vectored
+};
+
 /*
  * Local data and functions.
  */
@@ -1209,7 +1215,7 @@ ipi_send_cpu(int cpu, u_int ipi)
 		if (old_pending)
 			return;
 	}
-	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+	cpu_ops.ipi_vectored(ipi, cpu_apic_ids[cpu]);
 }
 
 /*
@@ -1460,7 +1466,7 @@ ipi_all_but_self(u_int ipi)
 		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
-	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
+	cpu_ops.ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
 
 int

Modified: head/sys/i386/include/cpu.h
==============================================================================
--- head/sys/i386/include/cpu.h	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/i386/include/cpu.h	Fri Sep  6 22:17:02 2013	(r255331)
@@ -54,6 +54,17 @@
 #define	TRAPF_PC(framep)	((framep)->tf_eip)
 
 #ifdef _KERNEL
+/*
+ * Struct containing pointers to CPU management functions whose
+ * implementation is run time selectable.  Selection can be made,
+ * for example, based on detection of a particular CPU variant or
+ * hypervisor environment.
+ */
+struct cpu_ops {
+	void (*ipi_vectored)(u_int, int);
+};
+
+extern struct	cpu_ops cpu_ops;
 extern char	btext[];
 extern char	etext[];
 

Modified: head/sys/i386/include/pcpu.h
==============================================================================
--- head/sys/i386/include/pcpu.h	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/i386/include/pcpu.h	Fri Sep  6 22:17:02 2013	(r255331)
@@ -44,15 +44,6 @@
  * other processors"
  */
 
-#if defined(XEN) || defined(XENHVM)
-#ifndef NR_VIRQS
-#define	NR_VIRQS	24
-#endif
-#ifndef NR_IPIS
-#define	NR_IPIS		2
-#endif
-#endif
-
 #if defined(XEN)
 
 /* These are peridically updated in shared_info, and then copied here. */

Modified: head/sys/i386/include/smp.h
==============================================================================
--- head/sys/i386/include/smp.h	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/i386/include/smp.h	Fri Sep  6 22:17:02 2013	(r255331)
@@ -84,11 +84,6 @@ void	smp_masked_invltlb(cpuset_t mask);
 
 #ifdef XEN
 void ipi_to_irq_init(void);
-
-#define RESCHEDULE_VECTOR	0
-#define CALL_FUNCTION_VECTOR	1
-#define NR_IPIS			2
-
 #endif
 #endif /* !LOCORE */
 #endif /* SMP */

Modified: head/sys/i386/xen/mp_machdep.c
==============================================================================
--- head/sys/i386/xen/mp_machdep.c	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/i386/xen/mp_machdep.c	Fri Sep  6 22:17:02 2013	(r255331)
@@ -99,25 +99,37 @@ extern void failsafe_callback(void);
 extern void pmap_lazyfix_action(void);
 
 /*--------------------------- Forward Declarations ---------------------------*/
-static void	assign_cpu_ids(void);
-static void	set_interrupt_apic_ids(void);
-static int	start_all_aps(void);
-static int	start_ap(int apic_id);
-static void	release_aps(void *dummy);
+static driver_filter_t	smp_reschedule_interrupt;
+static driver_filter_t	smp_call_function_interrupt;
+static void		assign_cpu_ids(void);
+static void		set_interrupt_apic_ids(void);
+static int		start_all_aps(void);
+static int		start_ap(int apic_id);
+static void		release_aps(void *dummy);
+
+/*---------------------------------- Macros ----------------------------------*/
+#define	IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
 
 /*-------------------------------- Local Types -------------------------------*/
 typedef void call_data_func_t(uintptr_t , uintptr_t);
 
-/*
- * Store data from cpu_add() until later in the boot when we actually setup
- * the APs.
- */
 struct cpu_info {
 	int	cpu_present:1;
 	int	cpu_bsp:1;
 	int	cpu_disabled:1;
 };
 
+struct xen_ipi_handler
+{
+	driver_filter_t	*filter;
+	const char	*description;
+};
+
+enum {
+	RESCHEDULE_VECTOR,
+	CALL_FUNCTION_VECTOR,
+};
+
 /*-------------------------------- Global Data -------------------------------*/
 static u_int	hyperthreading_cpus;
 static cpuset_t	hyperthreading_cpus_mask;
@@ -161,8 +173,14 @@ static volatile u_int cpu_ipi_pending[MA
 static int cpu_logical;
 static int cpu_cores;
 
+static const struct xen_ipi_handler xen_ipis[] = 
+{
+	[RESCHEDULE_VECTOR]	= { smp_reschedule_interrupt,	"resched"  },
+	[CALL_FUNCTION_VECTOR]	= { smp_call_function_interrupt,"callfunc" }
+};
+
 /*------------------------------- Per-CPU Data -------------------------------*/
-DPCPU_DEFINE(xen_intr_handle_t, ipi_port[NR_IPIS]);
+DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
 DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
 
 /*------------------------------ Implementation ------------------------------*/
@@ -362,7 +380,7 @@ iv_lazypmap(uintptr_t a, uintptr_t b)
 /*
  * These start from "IPI offset" APIC_IPI_INTS
  */
-static call_data_func_t *ipi_vectors[] = 
+static call_data_func_t *ipi_vectors[6] = 
 {
 	iv_rendezvous,
 	iv_invltlb,
@@ -427,7 +445,7 @@ smp_call_function_interrupt(void *unused
 	    call_data->func_id > IPI_BITMAP_VECTOR)
 		panic("invalid function id %u", call_data->func_id);
 	
-	func = ipi_vectors[call_data->func_id - APIC_IPI_INTS];
+	func = ipi_vectors[IPI_TO_IDX(call_data->func_id)];
 	/*
 	 * Notify initiating CPU that I've grabbed the data and am
 	 * about to execute the function
@@ -473,44 +491,43 @@ cpu_mp_announce(void)
 static int
 xen_smp_cpu_init(unsigned int cpu)
 {
-	int rc;
-	xen_intr_handle_t irq_handle;
+	xen_intr_handle_t *ipi_handle;
+	const struct xen_ipi_handler *ipi;
+	int idx, rc;
 
-	DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], NULL);
-	DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], NULL);
+	ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
+	for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
 
-	/*
-	 * The PCPU variable pc_device is not initialized on i386 PV,
-	 * so we have to use the root_bus device in order to setup
-	 * the IPIs.
-	 */
-	rc = xen_intr_bind_ipi(root_bus, RESCHEDULE_VECTOR,
-	    cpu, smp_reschedule_interrupt, INTR_TYPE_TTY, &irq_handle);
-	if (rc < 0)
-		goto fail;
-	xen_intr_describe(irq_handle, "resched%u", cpu);
-	DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], irq_handle);
-
-	printf("[XEN] IPI cpu=%d port=%d vector=RESCHEDULE_VECTOR (%d)\n",
-	    cpu, xen_intr_port(irq_handle), RESCHEDULE_VECTOR);
-
-	rc = xen_intr_bind_ipi(root_bus, CALL_FUNCTION_VECTOR,
-	    cpu, smp_call_function_interrupt, INTR_TYPE_TTY, &irq_handle);
-	if (rc < 0)
-		goto fail;
-	xen_intr_describe(irq_handle, "callfunc%u", cpu);
-	DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], irq_handle);
+		/*
+		 * The PCPU variable pc_device is not initialized on i386 PV,
+		 * so we have to use the root_bus device in order to setup
+		 * the IPIs.
+		 */
+		rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu,
+		    ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]);
+		if (rc != 0) {
+			printf("Unable to allocate a XEN IPI port. "
+			    "Error %d\n", rc);
+			break;
+		}
+		xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
+	}
 
-	printf("[XEN] IPI cpu=%d port=%d vector=CALL_FUNCTION_VECTOR (%d)\n",
-	    cpu, xen_intr_port(irq_handle), CALL_FUNCTION_VECTOR);
+	for (;idx < nitems(xen_ipis); idx++)
+		    ipi_handle[idx] = NULL;
 
-	return (0);
+	if (rc == 0)
+		return (0);
+
+	/* Either all are successfully mapped, or none at all. */
+	for (idx = 0; idx < nitems(xen_ipis); idx++) {
+		if (ipi_handle[idx] == NULL)
+			continue;
+
+		xen_intr_unbind(ipi_handle[idx]);
+		ipi_handle[idx] = NULL;
+	}
 
- fail:
-	xen_intr_unbind(DPCPU_ID_GET(cpu, ipi_port[RESCHEDULE_VECTOR]));
-	DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], NULL);
-	xen_intr_unbind(DPCPU_ID_GET(cpu, ipi_port[CALL_FUNCTION_VECTOR]));
-	DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], NULL);
 	return (rc);
 }
 
@@ -980,8 +997,8 @@ start_ap(int apic_id)
 static void
 ipi_pcpu(int cpu, u_int ipi)
 {
-	KASSERT((ipi <= NR_IPIS), ("invalid IPI"));
-	xen_intr_signal(DPCPU_ID_GET(cpu, ipi_port[ipi]));
+	KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI"));
+	xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi]));
 }
 
 /*

Modified: head/sys/x86/xen/hvm.c
==============================================================================
--- head/sys/x86/xen/hvm.c	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/x86/xen/hvm.c	Fri Sep  6 22:17:02 2013	(r255331)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008 Citrix Systems, Inc.
+ * Copyright (c) 2008, 2013 Citrix Systems, Inc.
  * Copyright (c) 2012 Spectra Logic Corporation
  * All rights reserved.
  *
@@ -33,9 +33,19 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
 
 #include <dev/pci/pcivar.h>
+
 #include <machine/cpufunc.h>
+#include <machine/cpu.h>
+#include <machine/smp.h>
+
+#include <x86/apicreg.h>
 
 #include <xen/xen-os.h>
 #include <xen/features.h>
@@ -44,30 +54,450 @@ __FBSDID("$FreeBSD$");
 #include <xen/hvm.h>
 #include <xen/xen_intr.h>
 
-#include <vm/vm.h>
-#include <vm/pmap.h>
-
 #include <xen/interface/hvm/params.h>
 #include <xen/interface/vcpu.h>
 
-static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
+/*--------------------------- Forward Declarations ---------------------------*/
+static driver_filter_t xen_smp_rendezvous_action;
+static driver_filter_t xen_invltlb;
+static driver_filter_t xen_invlpg;
+static driver_filter_t xen_invlrng;
+static driver_filter_t xen_invlcache;
+#ifdef __i386__
+static driver_filter_t xen_lazypmap;
+#endif
+static driver_filter_t xen_ipi_bitmap_handler;
+static driver_filter_t xen_cpustop_handler;
+static driver_filter_t xen_cpususpend_handler;
+static driver_filter_t xen_cpustophard_handler;
+
+/*---------------------------- Extern Declarations ---------------------------*/
+/* Variables used by mp_machdep to perform the MMU related IPIs */
+extern volatile int smp_tlb_wait;
+extern vm_offset_t smp_tlb_addr2;
+#ifdef __i386__
+extern vm_offset_t smp_tlb_addr1;
+#else
+extern struct invpcid_descr smp_tlb_invpcid;
+extern uint64_t pcid_cr3;
+extern int invpcid_works;
+extern int pmap_pcid_enabled;
+extern pmap_t smp_tlb_pmap;
+#endif
+
+#ifdef __i386__
+extern void pmap_lazyfix_action(void);
+#endif
 
-DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
-DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
+/*---------------------------------- Macros ----------------------------------*/
+#define	IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
+
+/*-------------------------------- Local Types -------------------------------*/
+struct xen_ipi_handler
+{
+	driver_filter_t	*filter;
+	const char	*description;
+};
 
 /*-------------------------------- Global Data -------------------------------*/
+enum xen_domain_type xen_domain_type = XEN_NATIVE;
+
+static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
+
+static struct xen_ipi_handler xen_ipis[] = 
+{
+	[IPI_TO_IDX(IPI_RENDEZVOUS)]	= { xen_smp_rendezvous_action,	"r"   },
+	[IPI_TO_IDX(IPI_INVLTLB)]	= { xen_invltlb,		"itlb"},
+	[IPI_TO_IDX(IPI_INVLPG)]	= { xen_invlpg,			"ipg" },
+	[IPI_TO_IDX(IPI_INVLRNG)]	= { xen_invlrng,		"irg" },
+	[IPI_TO_IDX(IPI_INVLCACHE)]	= { xen_invlcache,		"ic"  },
+#ifdef __i386__
+	[IPI_TO_IDX(IPI_LAZYPMAP)]	= { xen_lazypmap,		"lp"  },
+#endif
+	[IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler,	"b"   },
+	[IPI_TO_IDX(IPI_STOP)]		= { xen_cpustop_handler,	"st"  },
+	[IPI_TO_IDX(IPI_SUSPEND)]	= { xen_cpususpend_handler,	"sp"  },
+	[IPI_TO_IDX(IPI_STOP_HARD)]	= { xen_cpustophard_handler,	"sth" },
+};
+
 /**
  * If non-zero, the hypervisor has been configured to use a direct
  * IDT event callback for interrupt injection.
  */
 int xen_vector_callback_enabled;
 
+/*------------------------------- Per-CPU Data -------------------------------*/
+DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
+DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
+DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
+
 /*------------------ Hypervisor Access Shared Memory Regions -----------------*/
 /** Hypercall table accessed via HYPERVISOR_*_op() methods. */
 char *hypercall_stubs;
 shared_info_t *HYPERVISOR_shared_info;
-enum xen_domain_type xen_domain_type = XEN_NATIVE;
 
+/*---------------------------- XEN PV IPI Handlers ---------------------------*/
+/*
+ * This are C clones of the ASM functions found in apic_vector.s
+ */
+static int
+xen_ipi_bitmap_handler(void *arg)
+{
+	struct trapframe *frame;
+
+	frame = arg;
+	ipi_bitmap_handler(*frame);
+	return (FILTER_HANDLED);
+}
+
+static int
+xen_smp_rendezvous_action(void *arg)
+{
+#ifdef COUNT_IPIS
+	int cpu;
+
+	cpu = PCPU_GET(cpuid);
+	(*ipi_rendezvous_counts[cpu])++;
+#endif /* COUNT_IPIS */
+
+	smp_rendezvous_action();
+	return (FILTER_HANDLED);
+}
+
+static int
+xen_invltlb(void *arg)
+{
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+	int cpu;
+
+	cpu = PCPU_GET(cpuid);
+#ifdef COUNT_XINVLTLB_HITS
+	xhits_gbl[cpu]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+	(*ipi_invltlb_counts[cpu])++;
+#endif /* COUNT_IPIS */
+#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
+
+	invltlb();
+	atomic_add_int(&smp_tlb_wait, 1);
+	return (FILTER_HANDLED);
+}
+
+#ifdef __amd64__
+static int
+xen_invltlb_pcid(void *arg)
+{
+	uint64_t cr3;
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+	int cpu;
+
+	cpu = PCPU_GET(cpuid);
+#ifdef COUNT_XINVLTLB_HITS
+	xhits_gbl[cpu]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+	(*ipi_invltlb_counts[cpu])++;
+#endif /* COUNT_IPIS */
+#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
+
+	cr3 = rcr3();
+	if (smp_tlb_invpcid.pcid != (uint64_t)-1 &&
+	    smp_tlb_invpcid.pcid != 0) {
+
+		if (invpcid_works) {
+			invpcid(&smp_tlb_invpcid, INVPCID_CTX);
+		} else {
+			/* Otherwise reload %cr3 twice. */
+			if (cr3 != pcid_cr3) {
+				load_cr3(pcid_cr3);
+				cr3 |= CR3_PCID_SAVE;
+			}
+			load_cr3(cr3);
+		}
+	} else {
+		invltlb_globpcid();
+	}
+	if (smp_tlb_pmap != NULL)
+		CPU_CLR_ATOMIC(PCPU_GET(cpuid), &smp_tlb_pmap->pm_save);
+
+	atomic_add_int(&smp_tlb_wait, 1);
+	return (FILTER_HANDLED);
+}
+#endif
+
+static int
+xen_invlpg(void *arg)
+{
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+	int cpu;
+
+	cpu = PCPU_GET(cpuid);
+#ifdef COUNT_XINVLTLB_HITS
+	xhits_pg[cpu]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+	(*ipi_invlpg_counts[cpu])++;
+#endif /* COUNT_IPIS */
+#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
+
+#ifdef __i386__
+	invlpg(smp_tlb_addr1);
+#else
+	invlpg(smp_tlb_invpcid.addr);
+#endif
+	atomic_add_int(&smp_tlb_wait, 1);
+	return (FILTER_HANDLED);
+}
+
+#ifdef __amd64__
+static int
+xen_invlpg_pcid(void *arg)
+{
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+	int cpu;
+
+	cpu = PCPU_GET(cpuid);
+#ifdef COUNT_XINVLTLB_HITS
+	xhits_pg[cpu]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+	(*ipi_invlpg_counts[cpu])++;
+#endif /* COUNT_IPIS */
+#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
+
+	if (invpcid_works) {
+		invpcid(&smp_tlb_invpcid, INVPCID_ADDR);
+	} else if (smp_tlb_invpcid.pcid == 0) {
+		invlpg(smp_tlb_invpcid.addr);
+	} else if (smp_tlb_invpcid.pcid == (uint64_t)-1) {
+		invltlb_globpcid();
+	} else {
+		uint64_t cr3;
+
+		/*
+		 * PCID supported, but INVPCID is not.
+		 * Temporarily switch to the target address
+		 * space and do INVLPG.
+		 */
+		cr3 = rcr3();
+		if (cr3 != pcid_cr3)
+			load_cr3(pcid_cr3 | CR3_PCID_SAVE);
+		invlpg(smp_tlb_invpcid.addr);
+		load_cr3(cr3 | CR3_PCID_SAVE);
+	}
+
+	atomic_add_int(&smp_tlb_wait, 1);
+	return (FILTER_HANDLED);
+}
+#endif
+
+static inline void
+invlpg_range(vm_offset_t start, vm_offset_t end)
+{
+	do {
+		invlpg(start);
+		start += PAGE_SIZE;
+	} while (start < end);
+}
+
+static int
+xen_invlrng(void *arg)
+{
+	vm_offset_t addr;
+#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS)
+	int cpu;
+
+	cpu = PCPU_GET(cpuid);
+#ifdef COUNT_XINVLTLB_HITS
+	xhits_rng[cpu]++;
+#endif /* COUNT_XINVLTLB_HITS */
+#ifdef COUNT_IPIS
+	(*ipi_invlrng_counts[cpu])++;
+#endif /* COUNT_IPIS */
+#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */
+
+#ifdef __i386__
+	addr = smp_tlb_addr1;
+	invlpg_range(addr, smp_tlb_addr2);
+#else
+	addr = smp_tlb_invpcid.addr;
+	if (pmap_pcid_enabled) {
+		if (invpcid_works) {
+			struct invpcid_descr d;
+
+			d = smp_tlb_invpcid;
+			do {
+				invpcid(&d, INVPCID_ADDR);
+				d.addr += PAGE_SIZE;
+			} while (d.addr < smp_tlb_addr2);
+		} else if (smp_tlb_invpcid.pcid == 0) {
+			/*
+			 * kernel pmap - use invlpg to invalidate
+			 * global mapping.
+			 */
+			invlpg_range(addr, smp_tlb_addr2);
+		} else if (smp_tlb_invpcid.pcid != (uint64_t)-1) {
+			invltlb_globpcid();
+			if (smp_tlb_pmap != NULL) {
+				CPU_CLR_ATOMIC(PCPU_GET(cpuid),
+				    &smp_tlb_pmap->pm_save);
+			}
+		} else {
+			uint64_t cr3;
+
+			cr3 = rcr3();
+			if (cr3 != pcid_cr3)
+				load_cr3(pcid_cr3 | CR3_PCID_SAVE);
+			invlpg_range(addr, smp_tlb_addr2);
+			load_cr3(cr3 | CR3_PCID_SAVE);
+		}
+	} else {
+		invlpg_range(addr, smp_tlb_addr2);
+	}
+#endif
+
+	atomic_add_int(&smp_tlb_wait, 1);
+	return (FILTER_HANDLED);
+}
+
+static int
+xen_invlcache(void *arg)
+{
+#ifdef COUNT_IPIS
+	int cpu = PCPU_GET(cpuid);
+
+	cpu = PCPU_GET(cpuid);
+	(*ipi_invlcache_counts[cpu])++;
+#endif /* COUNT_IPIS */
+
+	wbinvd();
+	atomic_add_int(&smp_tlb_wait, 1);
+	return (FILTER_HANDLED);
+}
+
+#ifdef __i386__
+static int
+xen_lazypmap(void *arg)
+{
+
+	pmap_lazyfix_action();
+	return (FILTER_HANDLED);
+}
+#endif
+
+static int
+xen_cpustop_handler(void *arg)
+{
+
+	cpustop_handler();
+	return (FILTER_HANDLED);
+}
+
+static int
+xen_cpususpend_handler(void *arg)
+{
+
+	cpususpend_handler();
+	return (FILTER_HANDLED);
+}
+
+static int
+xen_cpustophard_handler(void *arg)
+{
+
+	ipi_nmi_handler();
+	return (FILTER_HANDLED);
+}
+
+/* Xen PV IPI sender */
+static void
+xen_ipi_vectored(u_int vector, int dest)
+{
+	xen_intr_handle_t *ipi_handle;
+	int ipi_idx, to_cpu, self;
+
+	ipi_idx = IPI_TO_IDX(vector);
+	if (ipi_idx > nitems(xen_ipis))
+		panic("IPI out of range");
+
+	switch(dest) {
+	case APIC_IPI_DEST_SELF:
+		ipi_handle = DPCPU_GET(ipi_handle);
+		xen_intr_signal(ipi_handle[ipi_idx]);
+		break;
+	case APIC_IPI_DEST_ALL:
+		CPU_FOREACH(to_cpu) {
+			ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
+			xen_intr_signal(ipi_handle[ipi_idx]);
+		}
+		break;
+	case APIC_IPI_DEST_OTHERS:
+		self = PCPU_GET(cpuid);
+		CPU_FOREACH(to_cpu) {
+			if (to_cpu != self) {
+				ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
+				xen_intr_signal(ipi_handle[ipi_idx]);
+			}
+		}
+		break;
+	default:
+		to_cpu = apic_cpuid(dest);
+		ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
+		xen_intr_signal(ipi_handle[ipi_idx]);
+		break;
+	}
+}
+
+static void
+xen_cpu_ipi_init(int cpu)
+{
+	xen_intr_handle_t *ipi_handle;
+	const struct xen_ipi_handler *ipi;
+	device_t dev;
+	int idx, rc;
+
+	ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
+	dev = pcpu_find(cpu)->pc_device;
+	KASSERT((dev != NULL), ("NULL pcpu device_t"));
+
+	for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
+
+		if (ipi->filter == NULL) {
+			ipi_handle[idx] = NULL;
+			continue;
+		}
+
+		rc = xen_intr_alloc_and_bind_ipi(dev, cpu, ipi->filter,
+		    INTR_TYPE_TTY, &ipi_handle[idx]);
+		if (rc != 0)
+			panic("Unable to allocate a XEN IPI port");
+		xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
+	}
+}
+
+static void
+xen_init_ipis(void)
+{
+	int i;
+
+	if (!xen_hvm_domain() || !xen_vector_callback_enabled)
+		return;
+
+#ifdef __amd64__
+	if (pmap_pcid_enabled) {
+		xen_ipis[IPI_TO_IDX(IPI_INVLTLB)].filter = xen_invltlb_pcid;
+		xen_ipis[IPI_TO_IDX(IPI_INVLPG)].filter = xen_invlpg_pcid;
+	}
+#endif
+	CPU_FOREACH(i)
+		xen_cpu_ipi_init(i);
+
+	/* Set the xen pv ipi ops to replace the native ones */
+	cpu_ops.ipi_vectored = xen_ipi_vectored;
+}
+
+/*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
 static uint32_t
 xen_hvm_cpuid_base(void)
 {
@@ -253,4 +683,5 @@ void xen_hvm_init_cpu(void)
 }
 
 SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL);
+SYSINIT(xen_init_ipis, SI_SUB_SMP, SI_ORDER_FIRST, xen_init_ipis, NULL);
 SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL);

Modified: head/sys/x86/xen/xen_intr.c
==============================================================================
--- head/sys/x86/xen/xen_intr.c	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/x86/xen/xen_intr.c	Fri Sep  6 22:17:02 2013	(r255331)
@@ -1010,7 +1010,7 @@ xen_intr_bind_virq(device_t dev, u_int v
 }
 
 int
-xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu,
+xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu,
     driver_filter_t filter, enum intr_type flags,
     xen_intr_handle_t *port_handlep)
 {

Modified: head/sys/xen/xen_intr.h
==============================================================================
--- head/sys/xen/xen_intr.h	Fri Sep  6 21:26:36 2013	(r255330)
+++ head/sys/xen/xen_intr.h	Fri Sep  6 22:17:02 2013	(r255331)
@@ -141,21 +141,20 @@ int xen_intr_bind_virq(device_t dev, u_i
 	void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep);
 
 /**
- * Associate an interprocessor interrupt vector with an interrupt handler.
+ * Allocate a local event channel port for servicing interprocessor
+ * interupts and, if successful, associate the port with the specified
+ * interrupt handler.
  *
  * \param dev       The device making this bind request.
- * \param ipi       The interprocessor interrupt vector number of the
- *                  interrupt source being hooked.
  * \param cpu       The cpu receiving the IPI.
- * \param filter    An interrupt filter handler.  Specify NULL
- *                  to always dispatch to the ithread handler.
+ * \param filter    The interrupt filter servicing this IPI.
  * \param irqflags  Interrupt handler flags.  See sys/bus.h.
  * \param handlep   Pointer to an opaque handle used to manage this
  *                  registration.
  *
  * \returns  0 on success, otherwise an errno.
  */
-int xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu,
+int xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu,
 	driver_filter_t filter, enum intr_type irqflags,
 	xen_intr_handle_t *handlep);
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201309062217.r86MH2M8028267>