Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 18 Apr 2010 04:10:47 +0530
From:      "C. Jayachandran" <c.jayachandran@gmail.com>
To:        Randall Stewart <rrs@lakerest.net>
Cc:        freebsd-mips@freebsd.org
Subject:   SMP support for XLR processors.
Message-ID:  <w2z98a59be81004171540t2f0d5193nca2ec9e2540502e2@mail.gmail.com>

index | next in thread | raw e-mail

[-- Attachment #1 --]
I've a set of initial patches to enable SMP for RMI processors. It
comes up in multi-user with 32 CPUs.  I could do buildworld before I
updated to HEAD - with head there is a hang during buildworld which
I'm looking at, but I think the initial work can be checked in.

Neel, can you have a look at the first two patches - one is to enable
ULE scheduler and the second one is to move platform_init_ap to
slightly later in the initialization sequence.

The patches are :
1. mips-ule-support.patch
- Enable ULE scheduler for MIPS

2. mips-smp-move-platform.patch
- We need a hook to setup message ring and its interrupts, we use
platform_init_ap now, and move it be called later for XLR

3. rmi-prid.patch
Add RMI processor ID prints - right now it prints unknown processor.

4. rmi-pcib-fix.patch
XLR pci bridge should be off the IODI bus, currently it is off the
nexus bus which will cause issue when bus_setup_intr on nexus is
called

5. cleanup-reorg-cpuid-rge-kx.patch
This set has a many changes rolled up:
 - clean up rge driver, remove unused code paths and commented code
(long way to go still)
 - xlr_cpu_id(), xlr_core_id() and xlr_thr_id() updated and usage updated
 - fix a bug - we cannot use MIPS_PHYS_TO_KSEG0 on physical addresses
here, I have made changes for using XKPHYS to do the same process.
 - the KX bit is enabled and interrupts are disabled before the
physical memory is read, to avoid the KX bit setting from affecting
other code.
- move message ring code to on_chip.c from xlr_machdep.c, now all
message ring code is in on_chip.c

6. rmi-xlr-smp.patch
SMP Support for XLR
- mpwait.S added to get the other CPUs out of bootloader code at startup.
- SMP platform related functions.
- disable shared TLB code for SMP - since threads have different mappings.

The patches are also available at
http://sites.google.com/site/cjayachandran/files

Enabling all 32 threads will need a minor fix in the SMP code, see the
patch subr_smp-fix.patch at the same place.

Regards,
JC.

[-- Attachment #2 --]
Index: sys/kern/sched_ule.c
===================================================================
--- sys/kern/sched_ule.c	(revision 206735)
+++ sys/kern/sched_ule.c	(working copy)
@@ -80,7 +80,7 @@
 #include <machine/cpu.h>
 #include <machine/smp.h>
 
-#if defined(__sparc64__) || defined(__mips__)
+#if defined(__sparc64__)
 #error "This architecture is not currently compatible with ULE"
 #endif
 
Index: sys/mips/include/smp.h
===================================================================
--- sys/mips/include/smp.h	(revision 206735)
+++ sys/mips/include/smp.h	(working copy)
@@ -26,6 +26,7 @@
 #define	IPI_AST			0x0004
 #define	IPI_STOP		0x0008
 #define	IPI_STOP_HARD		0x0008
+#define	IPI_PREEMPT		0x0010
 
 #ifndef LOCORE
 
Index: sys/mips/mips/mp_machdep.c
===================================================================
--- sys/mips/mips/mp_machdep.c	(revision 206735)
+++ sys/mips/mips/mp_machdep.c	(working copy)
@@ -141,6 +141,10 @@
 			atomic_clear_int(&stopped_cpus, cpumask);
 			CTR0(KTR_SMP, "IPI_STOP (restart)");
 			break;
+		case IPI_PREEMPT:
+			CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
+			sched_preempt(curthread);
+			break;
 		default:
 			panic("Unknown IPI 0x%0x on cpu %d", ipi, curcpu);
 		}

[-- Attachment #3 --]
Index: sys/mips/mips/mpboot.S
===================================================================
--- sys/mips/mips/mpboot.S	(revision 206735)
+++ sys/mips/mips/mpboot.S	(working copy)
@@ -76,9 +76,10 @@
 
 	PTR_LA	gp, _C_LABEL(_gp)
 
+#ifndef TARGET_XLR_XLS
 	jal	platform_init_ap
 	move	a0, s0
-
+#endif
 	jal	smp_init_secondary
 	move	a0, s0
 
Index: sys/mips/mips/mp_machdep.c
===================================================================
--- sys/mips/mips/mp_machdep.c	(revision 206735)
+++ sys/mips/mips/mp_machdep.c	(working copy)
@@ -234,7 +238,9 @@
 void
 smp_init_secondary(u_int32_t cpuid)
 {
+#ifndef TARGET_XLR_XLS
 	int ipi_int_mask, clock_int_mask;
+#endif
 
 	/* TLB */
 	Mips_SetWIRED(0);
@@ -288,12 +294,16 @@
 	while (smp_started == 0)
 		; /* nothing */
 
+#ifndef TARGET_XLR_XLS
 	/*
 	 * Unmask the clock and ipi interrupts.
 	 */
 	clock_int_mask = hard_int_mask(5);
 	ipi_int_mask = hard_int_mask(platform_ipi_intrnum());
 	set_intr_mask(ALL_INT_MASK & ~(ipi_int_mask | clock_int_mask));
+#else
+	platform_init_ap(cpuid);
+#endif
 
 	/*
 	 * Bootstrap the compare register.

[-- Attachment #4 --]
Index: sys/mips/include/locore.h
===================================================================
--- sys/mips/include/locore.h	(revision 206735)
+++ sys/mips/include/locore.h	(working copy)
@@ -60,6 +60,7 @@
 				/*	0x09	unannounced */
 				/*	0x0a	unannounced */
 #define     MIPS_PRID_CID_LEXRA		0x0b	/* Lexra */
+#define     MIPS_PRID_CID_RMI		0x0c	/* RMI */
 #define     MIPS_PRID_CID_CAVIUM	0x0d	/* Cavium */
 #define MIPS_PRID_COPTS(x)	(((x) >> 24) & 0x00ff)	/* Company Options */
 
Index: sys/mips/mips/cpu.c
===================================================================
--- sys/mips/mips/cpu.c	(revision 206735)
+++ sys/mips/mips/cpu.c	(working copy)
@@ -178,6 +178,9 @@
 	case MIPS_PRID_CID_LEXRA:
 		printf("Lexra");
 		break;
+	case MIPS_PRID_CID_RMI:
+		printf("RMI");
+		break;
 	case MIPS_PRID_CID_CAVIUM:
 		printf("Cavium");
 		break;

[-- Attachment #5 --]
Index: sys/mips/rmi/xlr_pci.c
===================================================================
--- sys/mips/rmi/xlr_pci.c	(revision 206735)
+++ sys/mips/rmi/xlr_pci.c	(working copy)
@@ -636,4 +636,4 @@
 	sizeof(struct xlr_pcib_softc),
 };
 
-DRIVER_MODULE(pcib, nexus, xlr_pcib_driver, pcib_devclass, 0, 0);
+DRIVER_MODULE(pcib, iodi, xlr_pcib_driver, pcib_devclass, 0, 0);
Index: sys/mips/rmi/iodi.c
===================================================================
--- sys/mips/rmi/iodi.c	(revision 206735)
+++ sys/mips/rmi/iodi.c	(working copy)
@@ -223,6 +223,7 @@
 	 */
 	device_add_child(dev, "uart", 0);
 	device_add_child(dev, "xlr_i2c", 0);
+	device_add_child(dev, "pcib", 0);
 
 	if (xlr_board_info.usb)
 		device_add_child(dev, "ehci", 0);

[-- Attachment #6 --]
Index: sys/mips/rmi/dev/xlr/rge.c
===================================================================
--- sys/mips/rmi/dev/xlr/rge.c	(revision 206735)
+++ sys/mips/rmi/dev/xlr/rge.c	(working copy)
@@ -83,6 +83,7 @@
 #include <machine/param.h>
 #include <machine/intr_machdep.h>
 #include <machine/clock.h>	/* for DELAY */
+#include <machine/cpuregs.h>
 #include <machine/bus.h>	/* */
 #include <machine/resource.h>
 #include <mips/rmi/interrupt.h>
@@ -112,7 +113,6 @@
 MODULE_DEPEND(rge, miibus, 1, 1, 1);
 
 /* #define DEBUG */
-/*#define RX_COPY */
 
 #define RGE_TX_THRESHOLD 1024
 #define RGE_TX_Q_SIZE 1024
@@ -204,10 +204,18 @@
 	return value;
 }
 
+static __inline__ uint32_t 
+xlr_enable_kx(void)
+{
+	uint32_t sr = mips_rd_status();
+
+	mips_wr_status((sr & ~MIPS_SR_INT_IE) | MIPS_SR_KX);
+	return sr;
+}
+
 /* #define mac_stats_add(x, val) ({(x) += (val);}) */
 #define mac_stats_add(x, val) ldadd_wu(val, &x)
 
-
 #define XLR_MAX_CORE 8
 #define RGE_LOCK_INIT(_sc, _name) \
   mtx_init(&(_sc)->rge_mtx, _name, MTX_NETWORK_LOCK, MTX_DEF)
@@ -332,56 +340,6 @@
 #define STR(x) __STR(x)
 #endif
 
-#define XKPHYS        0x8000000000000000
-/* -- No longer needed RRS
-static __inline__ uint32_t
-lw_40bit_phys(uint64_t phys, int cca)
-{
-	uint64_t addr;
-	uint32_t value = 0;
-	unsigned long flags;
-
-	addr = XKPHYS | ((uint64_t) cca << 59) | (phys & 0xfffffffffcULL);
-
-	enable_KX(flags);
-	__asm__ __volatile__(
-	            ".set push\n"
-	            ".set noreorder\n"
-	            ".set mips64\n"
-	            "lw    %0, 0(%1) \n"
-	            ".set pop\n"
-	    :       "=r"(value)
-	    :       "r"(addr));
-
-	disable_KX(flags);
-	return value;
-}
-*/
-/* -- No longer used RRS
-static __inline__ uint64_t
-ld_40bit_phys(uint64_t phys, int cca)
-{
-	uint64_t addr;
-	uint64_t value = 0;
-	unsigned long flags;
-
-
-	addr = XKPHYS | ((uint64_t) cca << 59) | (phys & 0xfffffffffcULL);
-	enable_KX(flags);
-	__asm__ __volatile__(
-	            ".set push\n"
-	            ".set noreorder\n"
-	            ".set mips64\n"
-	            "ld    %0, 0(%1) \n"
-	            ".set pop\n"
-	    :       "=r"(value)
-	    :       "r"(addr));
-
-	disable_KX(flags);
-	return value;
-}
-*/
-
 void *xlr_tx_ring_mem;
 
 struct tx_desc_node {
@@ -449,7 +407,7 @@
 
 	for (i = 0; i < 32; i++) {
 		if (cpumask & (1 << i)) {
-			cpu = cpu_ltop_map[i];
+			cpu = i;
 			if (!active_core[cpu / 4]) {
 				active_core[cpu / 4] = 1;
 				xlr_total_active_core++;
@@ -507,7 +465,7 @@
 {
 	struct tx_desc_node *node;
 	struct p2d_tx_desc *tx_desc = NULL;
-	int cpu = xlr_cpu_id();
+	int cpu = xlr_core_id();
 
 	mtx_lock_spin(&tx_desc_lock[cpu]);
 	node = TAILQ_FIRST(&tx_frag_desc[cpu]);
@@ -527,7 +485,7 @@
 free_p2d_desc(struct p2d_tx_desc *tx_desc)
 {
 	struct tx_desc_node *node;
-	int cpu = xlr_cpu_id();
+	int cpu = xlr_core_id();
 
 	mtx_lock_spin(&tx_desc_lock[cpu]);
 	node = TAILQ_FIRST(&free_tx_frag_desc[cpu]);
@@ -553,7 +511,7 @@
 	vm_offset_t taddr;
 	uint64_t fr_stid;
 
-	fr_stid = (xlr_cpu_id() << 3) + xlr_thr_id() + 4;
+	fr_stid = (xlr_core_id() << 3) + xlr_thr_id() + 4;
 
 	if (tx_desc == NULL)
 		return 1;
@@ -620,21 +578,6 @@
 static void
 release_tx_desc(struct msgrng_msg *msg, int rel_buf)
 {
-	/*
-	 * OLD code: vm_paddr_t paddr = msg->msg0 & 0xffffffffffULL;
-	 * uint64_t temp; struct p2d_tx_desc *tx_desc; struct mbuf *m;
-	 * 
-	 * paddr += (XLR_MAX_TX_FRAGS * sizeof(uint64_t)); *** In o32 we will
-	 * crash here ****** temp = ld_40bit_phys(paddr, 3); tx_desc =
-	 * (struct p2d_tx_desc *)((vm_offset_t)temp);
-	 * 
-	 * if (rel_buf) { paddr += sizeof(uint64_t);
-	 * 
-	 * temp = ld_40bit_phys(paddr, 3);
-	 * 
-	 * m = (struct mbuf *)((vm_offset_t)temp); m_freem(m); } printf("Call
-	 * fre_p2d_desc\n"); free_p2d_desc(tx_desc);
-	 */
 	struct p2d_tx_desc *tx_desc, *chk_addr;
 	struct mbuf *m;
 
@@ -652,53 +595,7 @@
 	free_p2d_desc(tx_desc);
 }
 
-#ifdef RX_COPY
-#define RGE_MAX_NUM_DESC (6 * MAX_NUM_DESC)
-uint8_t *rge_rx_buffers[RGE_MAX_NUM_DESC];
-static struct mtx rge_rx_mtx;
-int g_rx_buf_head;
 
-static void
-init_rx_buf(void)
-{
-	int i;
-	uint8_t *buf, *start;
-	uint32_t size, *ptr;
-
-	mtx_init(&rge_rx_mtx, "xlr rx_desc", NULL, MTX_SPIN);
-
-	size = (RGE_MAX_NUM_DESC * (MAX_FRAME_SIZE + XLR_CACHELINE_SIZE));
-
-	start = (uint8_t *) contigmalloc(size, M_DEVBUF, M_NOWAIT | M_ZERO,
-	    0, 0xffffffff, XLR_CACHELINE_SIZE, 0);
-	if (start == NULL)
-		panic("NO RX BUFFERS");
-	buf = start;
-	size = (MAX_FRAME_SIZE + XLR_CACHELINE_SIZE);
-	for (i = 0; i < RGE_MAX_NUM_DESC; i++) {
-		buf = start + (i * size);
-		ptr = (uint32_t *) buf;
-		*ptr = (uint32_t) buf;
-		rge_rx_buffers[i] = buf + XLR_CACHELINE_SIZE;
-	}
-}
-
-static void *
-get_rx_buf(void)
-{
-	void *ptr = NULL;
-
-	mtx_lock_spin(&rge_rx_mtx);
-	if (g_rx_buf_head < RGE_MAX_NUM_DESC) {
-		ptr = (void *)rge_rx_buffers[g_rx_buf_head];
-		g_rx_buf_head++;
-	}
-	mtx_unlock_spin(&rge_rx_mtx);
-	return ptr;
-}
-
-#endif
-
 static struct mbuf *
 get_mbuf(void)
 {
@@ -716,23 +613,16 @@
 free_buf(vm_paddr_t paddr)
 {
 	struct mbuf *m;
-	uint32_t *temp;
-	uint32_t mag, um;
+	uint32_t mag, um, sr;
 
-	/*
-	 * This will crash I think. RRS temp = lw_40bit_phys((paddr -
-	 * XLR_CACHELINE_SIZE), 3); m = (struct mbuf *)temp;
-	 */
-	/*
-	 * This gets us a kseg0 address for the mbuf/magic on the ring but
-	 * we need to get the va to free the mbuf. This is stored at *temp;
-	 */
-	temp = (uint32_t *) MIPS_PHYS_TO_KSEG0(paddr - XLR_CACHELINE_SIZE);
-	um = temp[0];
-	mag = temp[1];
+	sr = xlr_enable_kx();
+	um = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE);
+	mag = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE + sizeof(uint32_t));
+	mips_wr_status(sr);
+
 	if (mag != 0xf00bad) {
-		printf("Something is wrong kseg:%p found mag:%x not 0xf00bad\n",
-		    temp, mag);
+		printf("Something is wrong kseg:%lx found mag:%x not 0xf00bad\n",
+		    (u_long)paddr, mag);
 		return;
 	}
 	m = (struct mbuf *)um;
@@ -743,19 +633,13 @@
 static void *
 get_buf(void)
 {
-#ifdef RX_COPY
-	return get_rx_buf();
-#else
 	struct mbuf *m_new = NULL;
-
+	unsigned int *md;
 #ifdef INVARIANTS
 	vm_paddr_t temp1, temp2;
-
 #endif
-	unsigned int *md;
 
 	m_new = get_mbuf();
-
 	if (m_new == NULL)
 		return NULL;
 
@@ -765,8 +649,6 @@
 	md[1] = 0xf00bad;
 	m_adj(m_new, XLR_CACHELINE_SIZE);
 
-
-	/* return (void *)m_new; */
 #ifdef INVARIANTS
 	temp1 = vtophys((vm_offset_t)m_new->m_data);
 	temp2 = vtophys((vm_offset_t)m_new->m_data + 1536);
@@ -774,7 +656,6 @@
 		panic("ALLOCED BUFFER IS NOT CONTIGUOUS\n");
 #endif
 	return (void *)m_new->m_data;
-#endif
 }
 
 /**********************************************************************
@@ -826,13 +707,13 @@
 {
 	int stid = priv->rfrbucket;
 	struct msgrng_msg msg;
-	int vcpu = (xlr_cpu_id() << 2) + xlr_thr_id();
+	int vcpu = xlr_cpu_id();
 
 	mac_make_desc_rfr(&msg, addr);
 
 	/* Send the packet to MAC */
-	dbg_msg("mac_%d: Sending free packet %llx to stid %d\n",
-	    priv->instance, addr, stid);
+	dbg_msg("mac_%d: Sending free packet %lx to stid %d\n",
+	    priv->instance, (u_long)addr, stid);
 	if (priv->type == XLR_XGMAC) {
 		while (message_send(1, MSGRNG_CODE_XGMAC, stid, &msg));
 	} else {
@@ -1088,7 +969,7 @@
 
 	for (i = 0; i < 32; i++) {
 		if (cpumask & (1 << i)) {
-			cpu = cpu_ltop_map[i];
+			cpu = i;
 			bucket = ((cpu >> 2) << 3);
 			//|(cpu & 0x03);
 			bucket_map |= (1ULL << bucket);
@@ -1613,10 +1494,7 @@
 static void
 mac_frin_replenish(void *args /* ignored */ )
 {
-#ifdef RX_COPY
-	return;
-#else
-	int cpu = xlr_cpu_id();
+	int cpu = xlr_core_id();
 	int done = 0;
 	int i = 0;
 
@@ -1685,7 +1563,6 @@
 		if (done == XLR_MAX_MACS)
 			break;
 	}
-#endif
 }
 
 static volatile uint32_t g_tx_frm_tx_ok=0;
@@ -1716,8 +1593,8 @@
 	struct rge_softc *sc = NULL;
 	struct driver_data *priv = 0;
 	struct ifnet *ifp;
-	int cpu = xlr_cpu_id();
-	int vcpu = (cpu << 2) + xlr_thr_id();
+	int vcpu = xlr_cpu_id();
+	int cpu = xlr_core_id();
 
 	dbg_msg("mac: bucket=%d, size=%d, code=%d, stid=%d, msg0=%llx msg1=%llx\n",
 	    bucket, size, code, stid, msg->msg0, msg->msg1);
@@ -2098,80 +1975,18 @@
 uint32_t gmac_rx_fail[32];
 uint32_t gmac_rx_pass[32];
 
-#ifdef RX_COPY
 static void
 rge_rx(struct rge_softc *sc, vm_paddr_t paddr, int len)
 {
-	/*
-	 * struct mbuf *m = (struct mbuf *)*(unsigned int *)((char *)addr -
-	 * XLR_CACHELINE_SIZE);
-	 */
 	struct mbuf *m;
-	void *ptr;
-	uint32_t *temp;
+	uint32_t tm, mag, sr;
 	struct ifnet *ifp = sc->rge_ifp;
-	unsigned long msgrng_flags;
-	int cpu = PCPU_GET(cpuid);
 
+	sr = xlr_enable_kx();
+	tm = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE);
+	mag = xlr_paddr_lw(paddr - XLR_CACHELINE_SIZE + sizeof(uint32_t));
+	mips_wr_status(sr);
 
-	temp = (uint32_t *) MIPS_PHYS_TO_KSEG0(paddr - XLR_CACHELINE_SIZE);
-
-	ptr = (void *)(temp + XLR_CACHELINE_SIZE);
-	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
-	if (m != NULL) {
-		m->m_len = m->m_pkthdr.len = MCLBYTES;
-		m_copyback(m, 0, len + BYTE_OFFSET, ptr);
-		/* align the data */
-		m->m_data += BYTE_OFFSET;
-		m->m_pkthdr.len = m->m_len = len;
-		m->m_pkthdr.rcvif = ifp;
-		gmac_rx_pass[cpu]++;
-	} else {
-		gmac_rx_fail[cpu]++;
-	}
-	msgrng_access_enable(msgrng_flags);
-	xlr_mac_send_fr(&sc->priv, paddr, MAX_FRAME_SIZE);
-	msgrng_access_disable(msgrng_flags);
-
-#ifdef DUMP_PACKETS
-	{
-		int i = 0;
-		unsigned char *buf = (char *)m->m_data;
-
-		printf("Rx Packet: length=%d\n", len);
-		for (i = 0; i < 64; i++) {
-			if (i && (i % 16) == 0)
-				printf("\n");
-			printf("%02x ", buf[i]);
-		}
-		printf("\n");
-	}
-#endif
-
-
-	if (m) {
-		ifp->if_ipackets++;
-		(*ifp->if_input) (ifp, m);
-	}
-}
-
-#else
-static void
-rge_rx(struct rge_softc *sc, vm_paddr_t paddr, int len)
-{
-	/*
-	 * struct mbuf *m = (struct mbuf *)*(unsigned int *)((char *)addr -
-	 * XLR_CACHELINE_SIZE);
-	 */
-	struct mbuf *m;
-	uint32_t *temp, tm, mag;
-
-	struct ifnet *ifp = sc->rge_ifp;
-
-
-	temp = (uint32_t *) MIPS_PHYS_TO_KSEG0(paddr - XLR_CACHELINE_SIZE);
-	tm = temp[0];
-	mag = temp[1];
 	m = (struct mbuf *)tm;
 	if (mag != 0xf00bad) {
 		/* somebody else packet Error - FIXME in intialization */
@@ -2201,8 +2016,6 @@
 	(*ifp->if_input) (ifp, m);
 }
 
-#endif
-
 static void
 rge_intr(void *arg)
 {
@@ -2268,8 +2081,8 @@
 	int prepend_pkt = 0;
 	int i = 0;
 	struct p2d_tx_desc *tx_desc = NULL;
-	int cpu = xlr_cpu_id();
-	uint32_t vcpu = (cpu << 2) + xlr_thr_id();
+	int cpu = xlr_core_id();
+	uint32_t vcpu = xlr_cpu_id();
 
 	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
 		return;
@@ -2716,9 +2529,6 @@
 {
 	init_p2d_allocation();
 	init_tx_ring();
-#ifdef RX_COPY
-	init_rx_buf();
-#endif
 
 	if (xlr_board_info.is_xls) {
 		if (register_msgring_handler(TX_STN_GMAC0,
Index: sys/mips/rmi/xlrconfig.h
===================================================================
--- sys/mips/rmi/xlrconfig.h	(revision 206735)
+++ sys/mips/rmi/xlrconfig.h	(working copy)
@@ -127,39 +127,35 @@
         :"$8", "$9");                                           \
    } while(0)
 
-#if 0
-#define xlr_processor_id()                                      \
+#define xlr_cpu_id()                                            \
 ({int __id;                                                     \
  __asm__ __volatile__ (                                         \
            ".set push\n"                                        \
            ".set noreorder\n"                                   \
-           ".word 0x40088007\n"                                 \
-           "srl  $8, $8, 10\n"                                  \
-           "andi %0, $8, 0x3f\n"                                \
+           "mfc0 $8, $15, 1\n"                                  \
+           "andi %0, $8, 0x1f\n"                                \
            ".set pop\n"                                         \
            : "=r" (__id) : : "$8");                             \
  __id;})
-#endif
 
-#define xlr_cpu_id()                                        \
+#define xlr_core_id()                                           \
 ({int __id;                                                     \
  __asm__ __volatile__ (                                         \
            ".set push\n"                                        \
            ".set noreorder\n"                                   \
-           ".word 0x40088007\n"                                 \
-           "srl  $8, $8, 4\n"                                   \
-           "andi %0, $8, 0x7\n"                                \
+           "mfc0 $8, $15, 1\n"                                  \
+           "andi %0, $8, 0x1f\n"                                \
            ".set pop\n"                                         \
            : "=r" (__id) : : "$8");                             \
- __id;})
+ __id/4;})
 
-#define xlr_thr_id()                                        \
+#define xlr_thr_id()                                            \
 ({int __id;                                                     \
  __asm__ __volatile__ (                                         \
            ".set push\n"                                        \
            ".set noreorder\n"                                   \
-           ".word 0x40088007\n"                                 \
-           "andi %0, $8, 0x03\n"                                \
+           "mfc0 $8, $15, 1\n"                                  \
+           "andi %0, $8, 0x3\n"                                 \
            ".set pop\n"                                         \
            : "=r" (__id) : : "$8");                             \
  __id;})
@@ -333,4 +329,26 @@
 	    :       "$8", "$9");
 }
 
+static __inline__ uint32_t
+xlr_paddr_lw(uint64_t paddr)
+{
+        uint32_t high, low, tmp;
+
+        high = 0x98000000 | (paddr >> 32);
+        low = paddr & 0xffffffff;
+
+        __asm__ __volatile__(
+                    ".set push         \n\t"
+                    ".set mips64       \n\t"
+                    "dsll32 %1, %1, 0  \n\t"
+                    "dsll32 %2, %2, 0  \n\t"  /* get rid of the */
+                    "dsrl32 %2, %2, 0  \n\t"  /* sign extend */
+                    "or     %1, %1, %2 \n\t"
+                    "lw     %0, 0(%1)  \n\t"
+                    ".set pop           \n"
+            :       "=r"(tmp)
+            :       "r"(high), "r"(low));
+
+	return tmp;
+}
 #endif
Index: sys/mips/rmi/on_chip.c
===================================================================
--- sys/mips/rmi/on_chip.c	(revision 206735)
+++ sys/mips/rmi/on_chip.c	(working copy)
@@ -38,8 +38,18 @@
 #include <sys/limits.h>
 #include <sys/bus.h>
 
+#include <sys/ktr.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/sched.h>
+#include <sys/unistd.h>
+#include <sys/sysctl.h>
+
 #include <machine/reg.h>
 #include <machine/cpu.h>
+#include <machine/hwfunc.h>
 #include <machine/mips_opcode.h>
 
 #include <machine/param.h>
@@ -62,6 +72,16 @@
 	void *dev_id;
 };
 
+struct msgring_ithread {
+	struct thread *i_thread;
+	u_int i_pending;
+	u_int i_flags;
+	int i_cpu;
+};
+
+struct msgring_ithread msgring_ithreads[MAXCPU];
+char ithd_name[MAXCPU][32];
+
 /* globals */
 static struct tx_stn_handler tx_stn_handlers[MAX_TX_STNS];
 
@@ -91,8 +111,6 @@
 
 uint32_t msgrng_msg_cycles = 0;
 
-int xlr_counters[MAXCPU][XLR_MAX_COUNTERS] __aligned(XLR_CACHELINE_SIZE);
-
 void xlr_msgring_handler(struct trapframe *);
 
 void 
@@ -103,11 +121,12 @@
 	int id;
 	unsigned long flags;
 
-	/* if not thread 0 */
-	if (xlr_thr_id() != 0)
-		return;
-	id = xlr_cpu_id();
+	KASSERT(xlr_thr_id() == 0,
+		("xlr_msgring_cpu_init from non-zero thread\n"));
 
+	id = xlr_core_id();
+	printf("msgrng init on core %d, thr %d\n",  id, xlr_thr_id());
+
 	bucket_sizes = xlr_board_info.bucket_sizes;
 	cc_config = xlr_board_info.credit_configs[id];
 
@@ -156,10 +175,6 @@
 
 	msgring_watermark_count = 1;
 	msgring_thread_mask = 0x01;
-/*   printf("[%s]: int_type = 0x%x, pop_num_buckets=%d, pop_bucket_mask=%x" */
-/*          "watermark_count=%d, thread_mask=%x\n", __FUNCTION__, */
-/*          msgring_int_type, msgring_pop_num_buckets, msgring_pop_bucket_mask, */
-/*          msgring_watermark_count, msgring_thread_mask); */
 }
 
 void 
@@ -172,7 +187,6 @@
 	unsigned int bucket_empty_bm = 0;
 	unsigned int status = 0;
 
-	xlr_inc_counter(MSGRNG_INT);
 	/* TODO: not necessary to disable preemption */
 	msgrng_flags_save(mflags);
 
@@ -185,8 +199,6 @@
 			break;
 
 		for (bucket = 0; bucket < msgring_pop_num_buckets; bucket++) {
-			uint32_t cycles = 0;
-
 			if ((bucket_empty_bm & (1 << bucket)) /* empty */ )
 				continue;
 
@@ -194,10 +206,6 @@
 			if (status)
 				continue;
 
-			xlr_inc_counter(MSGRNG_MSG);
-			msgrng_msg_cycles = mips_rd_count();
-			cycles = msgrng_msg_cycles;
-
 			tx_stid = xlr_board_info.msgmap[rx_stid];
 
 			if (!tx_stn_handlers[tx_stid].action) {
@@ -211,17 +219,12 @@
 				    &msg, tx_stn_handlers[tx_stid].dev_id);
 				msgrng_flags_save(mflags);
 			}
-			xlr_set_counter(MSGRNG_MSG_CYCLES, (read_c0_count() - cycles));
 		}
 	}
 
 	xlr_set_counter(MSGRNG_EXIT_STATUS, msgrng_read_status());
 
 	msgrng_flags_restore(mflags);
-
-	//dbg_msg("OUT irq=%d\n", irq);
-
-	/* Call the msg callback */
 }
 
 void 
@@ -249,9 +252,117 @@
 	msgrng_access_restore(&msgrng_lock, mflags);
 }
 
-extern void platform_prep_smp_launch(void);
-extern void msgring_process_fast_intr(void *arg);
+static int
+msgring_process_fast_intr(void *arg)
+{
+	int cpu = PCPU_GET(cpuid);
+	volatile struct msgring_ithread *it;
+	struct thread *td;
 
+	/* wakeup an appropriate intr_thread for processing this interrupt */
+	it = (volatile struct msgring_ithread *)&msgring_ithreads[cpu];
+	td = it->i_thread;
+
+	/*
+	 * Interrupt thread will enable the interrupts after processing all
+	 * messages
+	 */
+	disable_msgring_int(NULL);
+	atomic_store_rel_int(&it->i_pending, 1);
+	thread_lock(td);
+	if (TD_AWAITING_INTR(td)) {
+		TD_CLR_IWAIT(td);
+		sched_add(td, SRQ_INTR);
+	}
+	thread_unlock(td);
+	return FILTER_HANDLED;
+}
+
+static void
+msgring_process(void *arg)
+{
+	volatile struct msgring_ithread *ithd;
+	struct thread *td;
+	struct proc *p;
+
+	td = curthread;
+	p = td->td_proc;
+	ithd = (volatile struct msgring_ithread *)arg;
+	KASSERT(ithd->i_thread == td,
+	    ("%s:msg_ithread and proc linkage out of sync", __func__));
+
+	/* First bind this thread to the right CPU */
+	thread_lock(td);
+	sched_bind(td, ithd->i_cpu);
+	thread_unlock(td);
+
+	//printf("Started %s on CPU %d\n", __FUNCTION__, ithd->i_cpu);
+
+	while (1) {
+		while (ithd->i_pending) {
+			/*
+			 * This might need a full read and write barrier to
+			 * make sure that this write posts before any of the
+			 * memory or device accesses in the handlers.
+			 */
+			xlr_msgring_handler(NULL);
+			atomic_store_rel_int(&ithd->i_pending, 0);
+			enable_msgring_int(NULL);
+		}
+		if (!ithd->i_pending) {
+			thread_lock(td);
+			if (ithd->i_pending) {
+			  thread_unlock(td);
+			  continue;
+			}
+			sched_class(td, PRI_ITHD);
+			TD_SET_IWAIT(td);
+			mi_switch(SW_VOL, NULL);
+			thread_unlock(td);
+		}
+	}
+
+}
+
+static void 
+create_msgring_threads(void)
+{
+	int cpu;
+	uint32_t cpu_mask;
+	struct msgring_ithread *ithd;
+	struct thread *td;
+	struct proc *p;
+	int error;
+
+	cpu_mask = PCPU_GET(cpumask) | PCPU_GET(other_cpus);
+
+	/* Create kernel threads for message ring interrupt processing */
+	/* Currently create one task for thread 0 of each core */
+	for (cpu = 0; cpu < MAXCPU; cpu += 4) {
+		ithd = &msgring_ithreads[cpu];
+		sprintf(ithd_name[cpu], "msg_intr%d", cpu);
+		error = kproc_create(msgring_process,
+		    (void *)ithd,
+		    &p,
+		    (RFSTOPPED | RFHIGHPID),
+		    2,
+		    ithd_name[cpu]);
+
+		if (error)
+			panic("kproc_create() failed with %d", error);
+		td = FIRST_THREAD_IN_PROC(p);	/* XXXKSE */
+
+		thread_lock(td);
+		sched_class(td, PRI_ITHD);
+		TD_SET_IWAIT(td);
+		thread_unlock(td);
+		ithd->i_thread = td;
+		ithd->i_pending = 0;
+		ithd->i_cpu = cpu;
+		CTR2(KTR_INTR, "%s: created %s", __func__, ithd_name[cpu]);
+	}
+}
+
 int 
 register_msgring_handler(int major,
     void (*action) (int, int, int, int, struct msgrng_msg *, void *),
@@ -272,8 +383,7 @@
 	  mtx_unlock_spin(&msgrng_lock);
 
 	if (xlr_test_and_set(&msgring_int_enabled)) {
-		platform_prep_smp_launch();
-
+		create_msgring_threads();
 		cpu_establish_hardintr("msgring", (driver_filter_t *) msgring_process_fast_intr,
 			NULL, NULL, IRQ_MSGRING, 
 			INTR_TYPE_NET | INTR_FAST, &cookie);
@@ -303,7 +413,8 @@
 		 * Use local scheduling and high polarity for all IRTs
 		 * Invalidate all IRTs, by default
 		 */
-		xlr_write_reg(mmio, PIC_IRT_1_BASE + i, (level << 30) | (1 << 6) | (PIC_IRQ_BASE + i));
+		xlr_write_reg(mmio, PIC_IRT_1_BASE + i, (level << 30) | (1 << 6) |
+		    (PIC_IRQ_BASE + i));
 	}
 	dbg_msg("PIC init now done\n");
 }
@@ -311,8 +422,6 @@
 void 
 on_chip_init(void)
 {
-	int i = 0, j = 0;
-
 	/* Set xlr_io_base to the run time value */
 	mtx_init(&msgrng_lock, "msgring", NULL, MTX_SPIN | MTX_RECURSE);
 	mtx_init(&xlr_pic_lock, "pic", NULL, MTX_SPIN);
@@ -325,8 +434,4 @@
 	pic_init();
 
 	xlr_msgring_cpu_init();
-
-	for (i = 0; i < MAXCPU; i++)
-		for (j = 0; j < XLR_MAX_COUNTERS; j++)
-			atomic_set_int(&xlr_counters[i][j], 0);
 }
Index: sys/mips/rmi/xlr_machdep.c
===================================================================
--- sys/mips/rmi/xlr_machdep.c	(revision 206735)
+++ sys/mips/rmi/xlr_machdep.c	(working copy)
@@ -36,19 +36,11 @@
 #include <sys/rtprio.h>
 #include <sys/systm.h>
 #include <sys/interrupt.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
-#include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
-#include <sys/proc.h>
 #include <sys/random.h>
-#include <sys/resourcevar.h>
-#include <sys/sched.h>
-#include <sys/sysctl.h>
-#include <sys/unistd.h>
 
 #include <sys/cons.h>		/* cinit() */
 #include <sys/kdb.h>
@@ -87,16 +79,11 @@
 #include <mips/rmi/perfmon.h>
 #endif
 
-
-
-void platform_prep_smp_launch(void);
-
 unsigned long xlr_io_base = (unsigned long)(DEFAULT_XLR_IO_BASE);
 
 /* 4KB static data aread to keep a copy of the bootload env until
    the dynamic kenv is setup */
 char boot1_env[4096];
-extern unsigned long _gp;
 int rmi_spin_mutex_safe=0;
 /*
  * Parameters from boot loader
@@ -108,16 +95,6 @@
 char **xlr_argv, **xlr_envp;
 uint64_t cpu_mask_info;
 uint32_t xlr_online_cpumask;
-
-#ifdef SMP
-static unsigned long xlr_secondary_gp[MAXCPU];
-static unsigned long xlr_secondary_sp[MAXCPU];
-
-#endif
-extern int mips_cpu_online_mask;
-extern int mips_cpu_logical_mask;
-uint32_t cpu_ltop_map[MAXCPU];
-uint32_t cpu_ptol_map[MAXCPU];
 uint32_t xlr_core_cpu_mask = 0x1;	/* Core 0 thread 0 is always there */
 
 void
@@ -130,27 +107,6 @@
 	mmio[8] = 0x1;
 }
 
-void 
-platform_secondary_init(void)
-{
-#ifdef SMP
-	xlr_msgring_cpu_init();
-
-	/* Setup interrupts for secondary CPUs here */
-	mips_mask_hard_irq(IPI_SMP_CALL_FUNCTION);
-	mips_mask_hard_irq(IPI_STOP);
-	mips_mask_hard_irq(IPI_RENDEZVOUS);
-	mips_mask_hard_irq(IPI_AST);
-	mips_mask_hard_irq(IRQ_TIMER);
-#ifdef XLR_PERFMON
-	mips_mask_hard_irq(IPI_PERFMON);
-#endif
-
-	return;
-#endif
-}
-
-
 int xlr_asid_pcpu = 256;	/* This the default */
 int xlr_shtlb_enabled = 0;
 
@@ -168,13 +124,13 @@
 	int mmu_setup;
 	int value = 0;
 	uint32_t cpu_map = xlr_boot1_info.cpu_online_map;
-	uint32_t thr_mask = cpu_map >> (xlr_cpu_id() << 2);
+	uint32_t thr_mask = cpu_map >> (xlr_core_id() << 2);
 	uint8_t core0 = xlr_boot1_info.cpu_online_map & 0xf;
 	uint8_t core_thr_mask;
 	int i = 0, count = 0;
 
 	/* If CPU0 did not enable shared TLB, other cores need to follow */
-	if ((xlr_cpu_id() != 0) && (xlr_shtlb_enabled == 0))
+	if ((xlr_core_id() != 0) && (xlr_shtlb_enabled == 0))
 		return;
 	/* First check if each core is brought up with the same mask */
 	for (i = 1; i < 8; i++) {
@@ -287,23 +243,7 @@
 }
 
 
-#ifdef SMP
-extern void xlr_secondary_start(unsigned long, unsigned long, unsigned long);
 static void 
-xlr_secondary_entry(void *data)
-{
-	unsigned long sp, gp;
-	unsigned int cpu = (xlr_cpu_id() << 2) + xlr_thr_id();
-
-	sp = xlr_secondary_sp[cpu];
-	gp = xlr_secondary_gp[cpu];
-
-	xlr_secondary_start((unsigned long)mips_secondary_wait, sp, gp);
-}
-
-#endif
-
-static void 
 xlr_set_boot_flags(void)
 {
 	char *p;
@@ -341,7 +281,6 @@
 }
 extern uint32_t _end;
 
-
 static void
 mips_init(void)
 {
@@ -353,9 +292,6 @@
 	mips_cpu_init();
 	pmap_bootstrap();
 #ifdef DDB
-#ifdef SMP
-	setup_nmi();
-#endif				/* SMP */
 	kdb_init();
 	if (boothowto & RB_KDB) {
 		kdb_enter("Boot flags requested debugger", NULL);
@@ -521,29 +457,17 @@
 	for (i = 1, j = 1; i < 32; i++) {
 		/* Allocate stack for all other cpus from fbsd kseg0 memory. */
 		if ((1U << i) & xlr_boot1_info.cpu_online_map) {
-			xlr_secondary_gp[i] =
-			    pmap_steal_memory(PAGE_SIZE);
-			if (!xlr_secondary_gp[i])
-				panic("Allocation failed for secondary cpu stacks");
-			xlr_secondary_sp[i] =
-			    xlr_secondary_gp[i] + PAGE_SIZE - CALLFRAME_SIZ;
-			xlr_secondary_gp[i] = (unsigned long)&_gp;
-			/* Build ltop and ptol cpu map. */
-			cpu_ltop_map[j] = i;
-			cpu_ptol_map[i] = j;
 			if ((i & 0x3) == 0)	/* store thread0 of each core */
 				xlr_core_cpu_mask |= (1 << j);
-			mips_cpu_logical_mask |= (1 << j);
 			j++;
 		}
 	}
 
-	mips_cpu_online_mask |= xlr_boot1_info.cpu_online_map;
 	wakeup = ((void (*) (void *, void *, unsigned int))
 	    (unsigned long)(xlr_boot1_info.wakeup));
 	printf("Waking up CPUs 0x%llx.\n", xlr_boot1_info.cpu_online_map & ~(0x1U));
 	if (xlr_boot1_info.cpu_online_map & ~(0x1U))
-		wakeup(xlr_secondary_entry, 0,
+		wakeup(mpwait, 0,
 		    (unsigned int)xlr_boot1_info.cpu_online_map);
 #endif
 
@@ -581,145 +505,3 @@
 platform_trap_exit(void)
 {
 }
-
-
-/*
- void
- platform_update_intrmask(int intr)
- {
-   write_c0_eimr64(read_c0_eimr64() | (1ULL<<intr));
- }
-*/
-
-void 
-disable_msgring_int(void *arg);
-void 
-enable_msgring_int(void *arg);
-void xlr_msgring_handler(struct trapframe *tf);
-int msgring_process_fast_intr(void *arg);
-
-struct msgring_ithread {
-	struct thread *i_thread;
-	u_int i_pending;
-	u_int i_flags;
-	int i_cpu;
-};
-struct msgring_ithread msgring_ithreads[MAXCPU];
-char ithd_name[MAXCPU][32];
-
-int
-msgring_process_fast_intr(void *arg)
-{
-	int cpu = PCPU_GET(cpuid);
-	volatile struct msgring_ithread *it;
-	struct thread *td;
-
-	/* wakeup an appropriate intr_thread for processing this interrupt */
-	it = (volatile struct msgring_ithread *)&msgring_ithreads[cpu];
-	td = it->i_thread;
-
-	/*
-	 * Interrupt thread will enable the interrupts after processing all
-	 * messages
-	 */
-	disable_msgring_int(NULL);
-	atomic_store_rel_int(&it->i_pending, 1);
-	thread_lock(td);
-	if (TD_AWAITING_INTR(td)) {
-		TD_CLR_IWAIT(td);
-		sched_add(td, SRQ_INTR);
-	}
-	thread_unlock(td);
-	return FILTER_HANDLED;
-}
-
-static void
-msgring_process(void *arg)
-{
-	volatile struct msgring_ithread *ithd;
-	struct thread *td;
-	struct proc *p;
-
-	td = curthread;
-	p = td->td_proc;
-	ithd = (volatile struct msgring_ithread *)arg;
-	KASSERT(ithd->i_thread == td,
-	    ("%s:msg_ithread and proc linkage out of sync", __func__));
-
-	/* First bind this thread to the right CPU */
-	thread_lock(td);
-	sched_bind(td, ithd->i_cpu);
-	thread_unlock(td);
-
-	//printf("Started %s on CPU %d\n", __FUNCTION__, ithd->i_cpu);
-
-	while (1) {
-		while (ithd->i_pending) {
-			/*
-			 * This might need a full read and write barrier to
-			 * make sure that this write posts before any of the
-			 * memory or device accesses in the handlers.
-			 */
-			xlr_msgring_handler(NULL);
-			atomic_store_rel_int(&ithd->i_pending, 0);
-			enable_msgring_int(NULL);
-		}
-		if (!ithd->i_pending) {
-			thread_lock(td);
-			if (ithd->i_pending) {
-			  thread_unlock(td);
-			  continue;
-			}
-			sched_class(td, PRI_ITHD);
-			TD_SET_IWAIT(td);
-			mi_switch(SW_VOL, NULL);
-			thread_unlock(td);
-		}
-	}
-
-}
-void 
-platform_prep_smp_launch(void)
-{
-	int cpu;
-	uint32_t cpu_mask;
-	struct msgring_ithread *ithd;
-	struct thread *td;
-	struct proc *p;
-	int error;
-
-	cpu_mask = PCPU_GET(cpumask) | PCPU_GET(other_cpus);
-
-	/* Create kernel threads for message ring interrupt processing */
-	/* Currently create one task for thread 0 of each core */
-	for (cpu = 0; cpu < MAXCPU; cpu += 1) {
-
-		if (!((1 << cpu) & cpu_mask))
-			continue;
-
-		if ((cpu_ltop_map[cpu] % 4) != 0)
-			continue;
-
-		ithd = &msgring_ithreads[cpu];
-		sprintf(ithd_name[cpu], "msg_intr%d", cpu);
-		error = kproc_create(msgring_process,
-		    (void *)ithd,
-		    &p,
-		    (RFSTOPPED | RFHIGHPID),
-		    2,
-		    ithd_name[cpu]);
-
-		if (error)
-			panic("kproc_create() failed with %d", error);
-		td = FIRST_THREAD_IN_PROC(p);	/* XXXKSE */
-
-		thread_lock(td);
-		sched_class(td, PRI_ITHD);
-		TD_SET_IWAIT(td);
-		thread_unlock(td);
-		ithd->i_thread = td;
-		ithd->i_pending = 0;
-		ithd->i_cpu = cpu;
-		CTR2(KTR_INTR, "%s: created %s", __func__, ithd_name[cpu]);
-	}
-}
Index: sys/mips/mips/machdep.c
===================================================================
--- sys/mips/mips/machdep.c	(revision 206735)
+++ sys/mips/mips/machdep.c	(working copy)
@@ -347,7 +347,7 @@
 	bcopy(MipsTLBMiss, (void *)TLB_MISS_EXC_VEC,
 	      MipsTLBMissEnd - MipsTLBMiss);
 
-#ifdef TARGET_OCTEON
+#if defined(TARGET_OCTEON) || defined(TARGET_XLR_XLS)
 /* Fake, but sufficient, for the 32-bit with 64-bit hardware addresses  */
 	bcopy(MipsTLBMiss, (void *)XTLB_MISS_EXC_VEC,
 	      MipsTLBMissEnd - MipsTLBMiss);

[-- Attachment #7 --]
diff -urN sys/mips/include/param.h sys/mips/include/param.h
--- sys/mips/include/param.h	2010-04-17 09:37:32.000941000 +0530
+++ sys/mips/include/param.h	2010-04-17 22:10:32.000013000 +0530
@@ -70,7 +70,7 @@
 #define	MID_MACHINE	0	/* None but has to be defined */
 
 #ifdef SMP
-#define	MAXSMPCPU	16
+#define	MAXSMPCPU	32
 #define	MAXCPU		MAXSMPCPU
 #else
 #define	MAXSMPCPU	1
diff -urN sys/mips/rmi/files.xlr sys/mips/rmi/files.xlr
--- sys/mips/rmi/files.xlr	2010-04-17 09:26:34.000204000 +0530
+++ sys/mips/rmi/files.xlr	2010-04-17 22:10:32.000030000 +0530
@@ -9,6 +9,7 @@
 mips/rmi/board.c				standard
 mips/rmi/on_chip.c				standard
 mips/rmi/intr_machdep.c			        standard
+mips/rmi/mpwait.S			        optional smp
 mips/rmi/xlr_i2c.c              		optional iic
 mips/rmi/uart_bus_xlr_iodi.c			optional uart 
 mips/rmi/uart_cpu_mips_xlr.c			optional uart 
diff -urN sys/mips/rmi/interrupt.h sys/mips/rmi/interrupt.h
--- sys/mips/rmi/interrupt.h	2010-04-17 09:26:34.000285000 +0530
+++ sys/mips/rmi/interrupt.h	2010-04-18 00:01:34.000026000 +0530
@@ -32,12 +32,9 @@
 
 /* Defines for the IRQ numbers */
 
-#define IRQ_DUMMY_UART           2
-#define IRQ_IPI_SMP_FUNCTION     3
-#define IRQ_IPI_SMP_RESCHEDULE   4
-#define IRQ_REMOTE_DEBUG         5
-#define IRQ_MSGRING              6
-#define IRQ_TIMER                7
+#define IRQ_IPI			41  /* 8-39 are mapped by PIC intr 0-31 */
+#define IRQ_MSGRING             6
+#define IRQ_TIMER               7
 
 /*
  * XLR needs custom pre and post handlers for PCI/PCI-e interrupts
diff -urN sys/mips/rmi/intr_machdep.c sys/mips/rmi/intr_machdep.c
--- sys/mips/rmi/intr_machdep.c	2010-04-17 09:26:34.000379000 +0530
+++ sys/mips/rmi/intr_machdep.c	2010-04-18 00:40:38.018052000 +0530
@@ -157,26 +157,7 @@
 	for (i = sizeof(eirr) * 8 - 1; i >= 0; i--) {
 		if ((eirr & (1ULL << i)) == 0)
 			continue;
-#ifdef SMP
-		/* These are reserved interrupts */
-		if ((i == IPI_AST) || (i == IPI_RENDEZVOUS) || (i == IPI_STOP)
-		    || (i == IPI_SMP_CALL_FUNCTION)) {
-			write_c0_eirr64(1ULL << i);
-			pic_ack(i, 0);
-			smp_handle_ipi(tf, i);
-			pic_delayed_ack(i, 0);
-			continue;
-		}
-#ifdef XLR_PERFMON
-		if (i == IPI_PERFMON) {
-			write_c0_eirr64(1ULL << i);
-			pic_ack(i, 0);
-			xlr_perfmon_sampler(NULL);
-			pic_delayed_ack(i, 0);
-			continue;
-		}
-#endif
-#endif
+
 		ie = mips_intr_events[i];
 		/* atomic_add_long(mih->cntp, 1); */
 
diff -urN sys/mips/rmi/mpwait.S sys/mips/rmi/mpwait.S
--- sys/mips/rmi/mpwait.S	1970-01-01 05:30:00.000000000 +0530
+++ sys/mips/rmi/mpwait.S	2010-04-17 22:10:32.000056000 +0530
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 2010 RMI Technologies Ltd.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <machine/asm.h>
+#include <machine/cpu.h>
+#include <machine/cpuregs.h>
+
+#include "assym.s"
+
+	.text
+	.set	noat
+	.set	noreorder
+
+/*
+ * On XLR the slave processors and threads will be executing boot
+ * loader code on startup. We need to make them run our code before
+ * blowing away boot loader memory.
+ */
+LEAF(mpwait)
+	PTR_LA  gp, _C_LABEL(_gp)
+	PTR_LA  t1, _C_LABEL(xlr_ap_release)
+	mfc0	t2, $15, 1
+	andi	t2, 0x1f
+	sll	t2, t2, 2
+	add	t1, t2
+
+1:	lw	t0, 0(t1)
+	bnez	t0, 2f
+	nop     /* We should not busy wait in core0 threads */
+	nop     /* on bootup, this will slow the cpu0 thread */
+	nop     /* down - TODO - wait with IPI based wakeup */
+	nop
+	nop
+	nop
+	nop
+	nop
+	j	1b
+	nop
+2:
+	PTR_LA  t1, _C_LABEL(mpentry)
+	jr	t1
+	nop
+END(mpwait)
diff -urN sys/mips/rmi/on_chip.c sys/mips/rmi/on_chip.c
--- sys/mips/rmi/on_chip.c	2010-04-17 22:04:42.000075000 +0530
+++ sys/mips/rmi/on_chip.c	2010-04-17 23:27:28.000108000 +0530
@@ -125,7 +125,6 @@
 		("xlr_msgring_cpu_init from non-zero thread\n"));
 
 	id = xlr_core_id();
-	printf("msgrng init on core %d, thr %d\n",  id, xlr_thr_id());
 
 	bucket_sizes = xlr_board_info.bucket_sizes;
 	cc_config = xlr_board_info.credit_configs[id];
diff -urN sys/mips/rmi/pic.h sys/mips/rmi/pic.h
--- sys/mips/rmi/pic.h	2010-04-17 09:26:34.000323000 +0530
+++ sys/mips/rmi/pic.h	2010-04-17 22:10:32.000073000 +0530
@@ -281,4 +281,16 @@
 	}
 }
 
+static inline
+void pic_send_ipi(int cpu, int ipi, int haslock)
+{
+        xlr_reg_t *mmio = xlr_io_mmio(XLR_IO_PIC_OFFSET);
+        int tid, pid;
+
+        tid = cpu & 0x3;
+        pid = (cpu >> 2) & 0x7;
+
+	xlr_write_reg(mmio, PIC_IPI,  (pid << 20) | (tid << 16) | ipi);
+}
+
 #endif				/* _RMI_PIC_H_ */
diff -urN sys/mips/rmi/xlr_machdep.c sys/mips/rmi/xlr_machdep.c
--- sys/mips/rmi/xlr_machdep.c	2010-04-17 22:04:42.000095000 +0530
+++ sys/mips/rmi/xlr_machdep.c	2010-04-17 22:16:57.000021000 +0530
@@ -79,6 +79,9 @@
 #include <mips/rmi/perfmon.h>
 #endif
 
+void mpwait(void);
+void enable_msgring_int(void *arg);
+
 unsigned long xlr_io_base = (unsigned long)(DEFAULT_XLR_IO_BASE);
 
 /* 4KB static data aread to keep a copy of the bootload env until
@@ -110,6 +113,7 @@
 int xlr_asid_pcpu = 256;	/* This the default */
 int xlr_shtlb_enabled = 0;
 
+#ifndef SMP
 /* This function sets up the number of tlb entries available
    to the kernel based on the number of threads brought up.
    The ASID range also gets divided similarly.
@@ -173,6 +177,7 @@
 	write_32bit_phnx_ctrl_reg(4, 0, mmu_setup);
 
 }
+#endif
 
 /*
  * Platform specific register setup for CPUs
@@ -187,8 +192,13 @@
 	char *hw_env;
 	char *start, *end;
 	uint32_t reg, val;
+#ifndef SMP
 	int thr_id = xlr_thr_id();
 
+/*
+ * XXX: SMP now need different wired mappings for threads 
+ * we cannot share TLBs.
+ */
 	if (thr_id == 0) {
 		if ((hw_env = getenv("xlr.shtlb")) != NULL) {
 			start = hw_env;
@@ -200,6 +210,7 @@
 			setup_tlb_resource();
 		}
 	}
+#endif
 	if ((hw_env = getenv("xlr.cr")) == NULL)
 		return;
 
@@ -505,3 +516,61 @@
 platform_trap_exit(void)
 {
 }
+
+#ifdef SMP
+int xlr_ap_release[MAXCPU];
+
+int platform_start_ap(int cpuid)
+{
+	/*
+	 * other cpus are enabled by the boot loader and they will be 
+	 * already looping in mpwait, release them
+	 */
+	atomic_store_rel_int(&xlr_ap_release[cpuid], 1);
+	return 0;
+}
+
+void platform_init_ap(int processor_id)
+{
+	uint32_t stat;
+
+	/* Setup interrupts for secondary CPUs here */
+	stat = mips_rd_status();
+	stat |= MIPS_SR_COP_2_BIT | MIPS_SR_COP_0_BIT;
+	mips_wr_status(stat);
+
+	xlr_unmask_hard_irq((void *)platform_ipi_intrnum());
+	xlr_unmask_hard_irq((void *)IRQ_TIMER);
+	if (xlr_thr_id() == 0) {
+		xlr_msgring_cpu_init(); 
+        	enable_msgring_int(NULL);
+		xlr_unmask_hard_irq((void *)IRQ_MSGRING);
+	}
+		
+	return;
+}
+
+int platform_ipi_intrnum(void) 
+{
+	return IRQ_IPI;
+}
+
+void platform_ipi_send(int cpuid)
+{
+	pic_send_ipi(cpuid, platform_ipi_intrnum(), 0);
+}
+
+void platform_ipi_clear(void)
+{
+}
+
+int platform_processor_id(void)
+{
+	return xlr_cpu_id();
+}
+
+int platform_num_processors(void)
+{
+	return fls(xlr_boot1_info.cpu_online_map);
+}
+#endif

[-- Attachment #8 --]
Index: sys/kern/subr_smp.c
===================================================================
--- sys/kern/subr_smp.c	(revision 205444)
+++ sys/kern/subr_smp.c	(working copy)
@@ -502,7 +502,10 @@
 	top = &group[0];
 	top->cg_parent = NULL;
 	top->cg_child = NULL;
-	top->cg_mask = (1 << mp_ncpus) - 1;
+	if (mp_ncpus == sizeof(top->cg_mask) * 8)
+		top->cg_mask = -1;
+	else
+		top->cg_mask = (1 << mp_ncpus) - 1;
 	top->cg_count = mp_ncpus;
 	top->cg_children = 0;
 	top->cg_level = CG_SHARE_NONE;
help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?w2z98a59be81004171540t2f0d5193nca2ec9e2540502e2>