Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 8 Jun 2019 18:26:49 +0000 (UTC)
From:      "Jonathan T. Looney" <jtl@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r348810 - head/sys/x86/x86
Message-ID:  <201906081826.x58IQnQe067743@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jtl
Date: Sat Jun  8 18:26:48 2019
New Revision: 348810
URL: https://svnweb.freebsd.org/changeset/base/348810

Log:
  Currently, MCA entries remain on an every-growing linked list. This means
  that it becomes increasingly expensive to process a steady stream of
  correctable errors. Additionally, the memory used by the MCA entries can
  grow without bound.
  
  Change the code to maintain two separate lists: a list of entries which
  still need to be logged, and a list of entries which have already been
  logged. Additionally, allow a user-configurable limit on the number of
  entries which will be saved after they are logged. (The limit defaults
  to -1 [unlimited], which is the current behavior.)
  
  Reviewed by:	imp, jhb
  MFC after:	2 weeks
  Sponsored by:	Netflix
  Differential Revision:	https://reviews.freebsd.org/D20482

Modified:
  head/sys/x86/x86/mca.c

Modified: head/sys/x86/x86/mca.c
==============================================================================
--- head/sys/x86/x86/mca.c	Sat Jun  8 17:49:17 2019	(r348809)
+++ head/sys/x86/x86/mca.c	Sat Jun  8 18:26:48 2019	(r348810)
@@ -86,7 +86,6 @@ struct amd_et_state {
 
 struct mca_internal {
 	struct mca_record rec;
-	int		logged;
 	STAILQ_ENTRY(mca_internal) link;
 };
 
@@ -101,6 +100,7 @@ static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Arch
 
 static volatile int mca_count;	/* Number of records stored. */
 static int mca_banks;		/* Number of per-CPU register banks. */
+static int mca_maxcount = -1;	/* Limit on records stored. (-1 = unlimited) */
 
 static SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL,
     "Machine Check Architecture");
@@ -125,10 +125,11 @@ SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RDTU
 static STAILQ_HEAD(, mca_internal) mca_freelist;
 static int mca_freecount;
 static STAILQ_HEAD(, mca_internal) mca_records;
+static STAILQ_HEAD(, mca_internal) mca_pending;
 static struct callout mca_timer;
 static int mca_ticks = 3600;	/* Check hourly by default. */
 static struct taskqueue *mca_tq;
-static struct task mca_refill_task, mca_scan_task;
+static struct task mca_resize_task, mca_scan_task;
 static struct mtx mca_lock;
 
 static unsigned int
@@ -557,32 +558,49 @@ mca_check_status(int bank, struct mca_record *rec)
 }
 
 static void
-mca_fill_freelist(void)
+mca_resize_freelist(void)
 {
-	struct mca_internal *rec;
-	int desired;
+	struct mca_internal *next, *rec;
+	STAILQ_HEAD(, mca_internal) tmplist;
+	int count, i, desired_max, desired_min;
 
 	/*
 	 * Ensure we have at least one record for each bank and one
-	 * record per CPU.
+	 * record per CPU, but no more than twice that amount.
 	 */
-	desired = imax(mp_ncpus, mca_banks);
+	desired_min = imax(mp_ncpus, mca_banks);
+	desired_max = imax(mp_ncpus, mca_banks) * 2;
+	STAILQ_INIT(&tmplist);
 	mtx_lock_spin(&mca_lock);
-	while (mca_freecount < desired) {
+	while (mca_freecount > desired_max) {
+		rec = STAILQ_FIRST(&mca_freelist);
+		KASSERT(rec != NULL, ("mca_freecount is %d, but list is empty",
+		    mca_freecount));
+		STAILQ_REMOVE_HEAD(&mca_freelist, link);
+		mca_freecount--;
+		STAILQ_INSERT_TAIL(&tmplist, rec, link);
+	}
+	while (mca_freecount < desired_min) {
+		count = desired_min - mca_freecount;
 		mtx_unlock_spin(&mca_lock);
-		rec = malloc(sizeof(*rec), M_MCA, M_WAITOK);
+		for (i = 0; i < count; i++) {
+			rec = malloc(sizeof(*rec), M_MCA, M_WAITOK);
+			STAILQ_INSERT_TAIL(&tmplist, rec, link);
+		}
 		mtx_lock_spin(&mca_lock);
-		STAILQ_INSERT_TAIL(&mca_freelist, rec, link);
-		mca_freecount++;
+		STAILQ_CONCAT(&mca_freelist, &tmplist);
+		mca_freecount += count;
 	}
 	mtx_unlock_spin(&mca_lock);
+	STAILQ_FOREACH_SAFE(rec, &tmplist, link, next)
+		free(rec, M_MCA);
 }
 
 static void
-mca_refill(void *context, int pending)
+mca_resize(void *context, int pending)
 {
 
-	mca_fill_freelist();
+	mca_resize_freelist();
 }
 
 static void
@@ -607,12 +625,8 @@ mca_record_entry(enum scan_mode mode, const struct mca
 	}
 
 	rec->rec = *record;
-	rec->logged = 0;
-	STAILQ_INSERT_TAIL(&mca_records, rec, link);
-	mca_count++;
+	STAILQ_INSERT_TAIL(&mca_pending, rec, link);
 	mtx_unlock_spin(&mca_lock);
-	if (mode == CMCI && !cold)
-		taskqueue_enqueue(mca_tq, &mca_refill_task);
 }
 
 #ifdef DEV_APIC
@@ -788,14 +802,69 @@ mca_scan(enum scan_mode mode, int *recoverablep)
 		}
 #endif
 	}
-	if (mode == POLLED)
-		mca_fill_freelist();
 	if (recoverablep != NULL)
 		*recoverablep = recoverable;
 	return (count);
 }
 
 /*
+ * Store a new record on the mca_records list while enforcing
+ * mca_maxcount.
+ */
+static void
+mca_store_record(struct mca_internal *mca)
+{
+
+	/*
+	 * If we are storing no records (mca_maxcount == 0),
+	 * we just free this record.
+	 *
+	 * If we are storing records (mca_maxcount != 0) and
+	 * we have free space on the list, store the record
+	 * and increment mca_count.
+	 *
+	 * If we are storing records and we do not have free
+	 * space on the list, store the new record at the
+	 * tail and free the oldest one from the head.
+	 */
+	if (mca_maxcount != 0)
+		STAILQ_INSERT_TAIL(&mca_records, mca, link);
+	if (mca_maxcount < 0 || mca_count < mca_maxcount)
+		mca_count++;
+	else {
+		if (mca_maxcount != 0) {
+			mca = STAILQ_FIRST(&mca_records);
+			STAILQ_REMOVE_HEAD(&mca_records, link);
+		}
+		STAILQ_INSERT_TAIL(&mca_freelist, mca, link);
+		mca_freecount++;
+	}
+}
+
+/*
+ * Do the work to process machine check records which have just been
+ * gathered. Print any pending logs to the console. Queue them for storage.
+ * Trigger a resizing of the free list.
+ */
+static void
+mca_process_records(enum scan_mode mode)
+{
+	struct mca_internal *mca;
+
+	mtx_lock_spin(&mca_lock);
+	while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) {
+		STAILQ_REMOVE_HEAD(&mca_pending, link);
+		mca_log(&mca->rec);
+		mca_store_record(mca);
+	}
+	mtx_unlock_spin(&mca_lock);
+	if (mode == POLLED)
+		mca_resize_freelist();
+	else if (!cold)
+		taskqueue_enqueue(mca_tq, &mca_resize_task);
+}
+
+/*
  * Scan the machine check banks on all CPUs by binding to each CPU in
  * turn.  If any of the CPUs contained new machine check records, log
  * them to the console.
@@ -803,11 +872,10 @@ mca_scan(enum scan_mode mode, int *recoverablep)
 static void
 mca_scan_cpus(void *context, int pending)
 {
-	struct mca_internal *mca;
 	struct thread *td;
 	int count, cpu;
 
-	mca_fill_freelist();
+	mca_resize_freelist();
 	td = curthread;
 	count = 0;
 	thread_lock(td);
@@ -819,16 +887,8 @@ mca_scan_cpus(void *context, int pending)
 		sched_unbind(td);
 	}
 	thread_unlock(td);
-	if (count != 0) {
-		mtx_lock_spin(&mca_lock);
-		STAILQ_FOREACH(mca, &mca_records, link) {
-			if (!mca->logged) {
-				mca->logged = 1;
-				mca_log(&mca->rec);
-			}
-		}
-		mtx_unlock_spin(&mca_lock);
-	}
+	if (count != 0)
+		mca_process_records(POLLED);
 }
 
 static void
@@ -853,6 +913,35 @@ sysctl_mca_scan(SYSCTL_HANDLER_ARGS)
 	return (0);
 }
 
+static int
+sysctl_mca_maxcount(SYSCTL_HANDLER_ARGS)
+{
+	struct mca_internal *mca;
+	int error, i;
+	bool doresize;
+
+	i = mca_maxcount;
+	error = sysctl_handle_int(oidp, &i, 0, req);
+	if (error || req->newptr == NULL)
+		return (error);
+	mtx_lock_spin(&mca_lock);
+	mca_maxcount = i;
+	doresize = false;
+	if (mca_maxcount >= 0)
+		while (mca_count > mca_maxcount) {
+			mca = STAILQ_FIRST(&mca_records);
+			STAILQ_REMOVE_HEAD(&mca_records, link);
+			mca_count--;
+			STAILQ_INSERT_TAIL(&mca_freelist, mca, link);
+			mca_freecount++;
+			doresize = true;
+		}
+	mtx_unlock_spin(&mca_lock);
+	if (doresize && !cold)
+		taskqueue_enqueue(mca_tq, &mca_resize_task);
+	return (error);
+}
+
 static void
 mca_createtq(void *dummy)
 {
@@ -864,7 +953,7 @@ mca_createtq(void *dummy)
 	taskqueue_start_threads(&mca_tq, 1, PI_SWI(SWI_TQ), "mca taskq");
 
 	/* CMCIs during boot may have claimed items from the freelist. */
-	mca_fill_freelist();
+	mca_resize_freelist();
 }
 SYSINIT(mca_createtq, SI_SUB_CONFIGURE, SI_ORDER_ANY, mca_createtq, NULL);
 
@@ -933,15 +1022,20 @@ mca_setup(uint64_t mcg_cap)
 	mca_banks = mcg_cap & MCG_CAP_COUNT;
 	mtx_init(&mca_lock, "mca", NULL, MTX_SPIN);
 	STAILQ_INIT(&mca_records);
+	STAILQ_INIT(&mca_pending);
 	TASK_INIT(&mca_scan_task, 0, mca_scan_cpus, NULL);
 	callout_init(&mca_timer, 1);
 	STAILQ_INIT(&mca_freelist);
-	TASK_INIT(&mca_refill_task, 0, mca_refill, NULL);
-	mca_fill_freelist();
+	TASK_INIT(&mca_resize_task, 0, mca_resize, NULL);
+	mca_resize_freelist();
 	SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
 	    "count", CTLFLAG_RD, (int *)(uintptr_t)&mca_count, 0,
 	    "Record count");
 	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
+	    "maxcount", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE,
+	    &mca_maxcount, 0, sysctl_mca_maxcount, "I",
+	    "Maximum record count (-1 is unlimited)");
+	SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO,
 	    "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks,
 	    0, sysctl_positive_int, "I",
 	    "Periodic interval in seconds to scan for machine checks");
@@ -1322,25 +1416,14 @@ mca_intr(void)
 void
 cmc_intr(void)
 {
-	struct mca_internal *mca;
-	int count;
 
 	/*
 	 * Serialize MCA bank scanning to prevent collisions from
 	 * sibling threads.
+	 *
+	 * If we found anything, log them to the console.
 	 */
-	count = mca_scan(CMCI, NULL);
-
-	/* If we found anything, log them to the console. */
-	if (count != 0) {
-		mtx_lock_spin(&mca_lock);
-		STAILQ_FOREACH(mca, &mca_records, link) {
-			if (!mca->logged) {
-				mca->logged = 1;
-				mca_log(&mca->rec);
-			}
-		}
-		mtx_unlock_spin(&mca_lock);
-	}
+	if (mca_scan(CMCI, NULL) != 0)
+		mca_process_records(CMCI);
 }
 #endif



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201906081826.x58IQnQe067743>