Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 6 Feb 2015 18:20:00 +0000 (UTC)
From:      John Baldwin <jhb@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r278325 - in head/sys: amd64/amd64 i386/i386 x86/x86
Message-ID:  <201502061820.t16IK0w7052945@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jhb
Date: Fri Feb  6 18:19:59 2015
New Revision: 278325
URL: https://svnweb.freebsd.org/changeset/base/278325

Log:
  Revert the IPI startup sequence to match what is described in the
  Intel Multiprocessor Specification v1.4.  The Intel SDM claims that
  the INIT IPIs here are invalid, but other systems follow the MP
  spec instead.
  
  While here, fix the IPI wait routine to accept a timeout in microseconds
  instead of a raw spin count, and don't spin forever during AP startup.
  Instead, panic if a STARTUP IPI is not delivered after 20 us.
  
  PR:		196542
  Differential Revision:	https://reviews.freebsd.org/D1719
  MFC after:	2 weeks

Modified:
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/x86/x86/local_apic.c

Modified: head/sys/amd64/amd64/mp_machdep.c
==============================================================================
--- head/sys/amd64/amd64/mp_machdep.c	Fri Feb  6 18:13:29 2015	(r278324)
+++ head/sys/amd64/amd64/mp_machdep.c	Fri Feb  6 18:19:59 2015	(r278325)
@@ -1065,14 +1065,27 @@ ipi_startup(int apic_id, int vector)
 {
 
 	/*
+	 * This attempts to follow the algorithm described in the
+	 * Intel Multiprocessor Specification v1.4 in section B.4.
+	 * For each IPI, we allow the local APIC ~20us to deliver the
+	 * IPI.  If that times out, we panic.
+	 */
+
+	/*
 	 * first we do an INIT IPI: this INIT IPI might be run, resetting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
-	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
 	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
-	lapic_ipi_wait(-1);
+	lapic_ipi_wait(20);
+
+	/* Explicitly deassert the INIT IPI. */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
+	    apic_id);
+
 	DELAY(10000);		/* wait ~10mS */
 
 	/*
@@ -1084,9 +1097,11 @@ ipi_startup(int apic_id, int vector)
 	 * will run.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
-	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
-	lapic_ipi_wait(-1);
+	if (!lapic_ipi_wait(20))
+		panic("Failed to deliver first STARTUP IPI to APIC %d",
+		    apic_id);
 	DELAY(200);		/* wait ~200uS */
 
 	/*
@@ -1096,9 +1111,12 @@ ipi_startup(int apic_id, int vector)
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
-	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
-	lapic_ipi_wait(-1);
+	if (!lapic_ipi_wait(20))
+		panic("Failed to deliver second STARTUP IPI to APIC %d",
+		    apic_id);
+
 	DELAY(200);		/* wait ~200uS */
 }
 

Modified: head/sys/i386/i386/mp_machdep.c
==============================================================================
--- head/sys/i386/i386/mp_machdep.c	Fri Feb  6 18:13:29 2015	(r278324)
+++ head/sys/i386/i386/mp_machdep.c	Fri Feb  6 18:19:59 2015	(r278325)
@@ -1138,14 +1138,27 @@ ipi_startup(int apic_id, int vector)
 {
 
 	/*
+	 * This attempts to follow the algorithm described in the
+	 * Intel Multiprocessor Specification v1.4 in section B.4.
+	 * For each IPI, we allow the local APIC ~20us to deliver the
+	 * IPI.  If that times out, we panic.
+	 */
+
+	/*
 	 * first we do an INIT IPI: this INIT IPI might be run, resetting
 	 * and running the target CPU. OR this INIT IPI might be latched (P5
 	 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be
 	 * ignored.
 	 */
-	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
 	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id);
-	lapic_ipi_wait(-1);
+	lapic_ipi_wait(20);
+
+	/* Explicitly deassert the INIT IPI. */
+	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL |
+	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT,
+	    apic_id);
+
 	DELAY(10000);		/* wait ~10mS */
 
 	/*
@@ -1157,9 +1170,11 @@ ipi_startup(int apic_id, int vector)
 	 * will run.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
-	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
-	lapic_ipi_wait(-1);
+	if (!lapic_ipi_wait(20))
+		panic("Failed to deliver first STARTUP IPI to APIC %d",
+		    apic_id);
 	DELAY(200);		/* wait ~200uS */
 
 	/*
@@ -1169,9 +1184,12 @@ ipi_startup(int apic_id, int vector)
 	 * recognized after hardware RESET or INIT IPI.
 	 */
 	lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
-	    APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
+	    APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP |
 	    vector, apic_id);
-	lapic_ipi_wait(-1);
+	if (!lapic_ipi_wait(20))
+		panic("Failed to deliver second STARTUP IPI to APIC %d",
+		    apic_id);
+
 	DELAY(200);		/* wait ~200uS */
 }
 

Modified: head/sys/x86/x86/local_apic.c
==============================================================================
--- head/sys/x86/x86/local_apic.c	Fri Feb  6 18:13:29 2015	(r278324)
+++ head/sys/x86/x86/local_apic.c	Fri Feb  6 18:19:59 2015	(r278325)
@@ -1452,22 +1452,22 @@ SYSINIT(apic_setup_io, SI_SUB_INTR, SI_O
 static int
 native_lapic_ipi_wait(int delay)
 {
-	int x, incr;
+	int x;
 
 	/*
-	 * Wait delay loops for IPI to be sent.  This is highly bogus
-	 * since this is sensitive to CPU clock speed.  If delay is
+	 * Wait delay microseconds for IPI to be sent.  If delay is
 	 * -1, we wait forever.
 	 */
 	if (delay == -1) {
-		incr = 0;
-		delay = 1;
-	} else
-		incr = 1;
-	for (x = 0; x < delay; x += incr) {
+		while ((lapic->icr_lo & APIC_DELSTAT_MASK) != APIC_DELSTAT_IDLE)
+			ia32_pause();
+		return (1);
+	}
+
+	for (x = 0; x < delay; x += 5) {
 		if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE)
 			return (1);
-		ia32_pause();
+		DELAY(5);
 	}
 	return (0);
 }
@@ -1501,9 +1501,9 @@ native_lapic_ipi_raw(register_t icrlo, u
 	intr_restore(saveintr);
 }
 
-#define	BEFORE_SPIN	1000000
+#define	BEFORE_SPIN	50000
 #ifdef DETECT_DEADLOCK
-#define	AFTER_SPIN	1000
+#define	AFTER_SPIN	50
 #endif
 
 static void
@@ -1514,7 +1514,7 @@ native_lapic_ipi_vectored(u_int vector, 
 	KASSERT((vector & ~APIC_VECTOR_MASK) == 0,
 	    ("%s: invalid vector %d", __func__, vector));
 
-	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE;
+	icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT;
 
 	/*
 	 * IPI_STOP_HARD is just a "fake" vector used to send a NMI.
@@ -1522,9 +1522,9 @@ native_lapic_ipi_vectored(u_int vector, 
 	 * the vector.
 	 */
 	if (vector == IPI_STOP_HARD)
-		icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT;
+		icrlo |= APIC_DELMODE_NMI;
 	else
-		icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT;
+		icrlo |= vector | APIC_DELMODE_FIXED;
 	destfield = 0;
 	switch (dest) {
 	case APIC_IPI_DEST_SELF:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201502061820.t16IK0w7052945>