Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 30 May 2019 13:45:42 +0000 (UTC)
From:      Marcin Wojtas <mw@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r348413 - head/sys/dev/ena
Message-ID:  <201905301345.x4UDjgS1077393@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mw
Date: Thu May 30 13:45:41 2019
New Revision: 348413
URL: https://svnweb.freebsd.org/changeset/base/348413

Log:
  Improve ENA reset handling
  
  For easier debugging, the reset is being triggered and the reset reason is
  being set only in case it is done for the first time. Such approach will
  ensure that the first reset reason is not going to be overwritten and
  will make it easier for debugging.
  
  Also, add a reset trigger upon invalid Tx requested ID.
  
  Submitted by:  Michal Krawczyk <mk@semihalf.com>
  Obtained from: Semihalf
  Sponsored by:  Amazon, Inc.

Modified:
  head/sys/dev/ena/ena.c

Modified: head/sys/dev/ena/ena.c
==============================================================================
--- head/sys/dev/ena/ena.c	Thu May 30 13:42:52 2019	(r348412)
+++ head/sys/dev/ena/ena.c	Thu May 30 13:45:41 2019	(r348413)
@@ -777,8 +777,10 @@ validate_rx_req_id(struct ena_ring *rx_ring, uint16_t 
 	counter_u64_add(rx_ring->rx_stats.bad_req_id, 1);
 
 	/* Trigger device reset */
-	rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
-	ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter);
+	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter))) {
+		rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
+		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, rx_ring->adapter);
+	}
 
 	return (EFAULT);
 }
@@ -1242,6 +1244,10 @@ validate_tx_req_id(struct ena_ring *tx_ring, uint16_t 
 	device_printf(adapter->pdev, "Invalid req_id: %hu\n", req_id);
 	counter_u64_add(tx_ring->tx_stats.bad_req_id, 1);
 
+	/* Trigger device reset */
+	adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
+	ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+
 	return (EFAULT);
 }
 
@@ -1799,8 +1805,10 @@ error:
 	counter_u64_add(rx_ring->rx_stats.bad_desc_num, 1);
 
 	/* Too many desc from the device. Trigger reset */
-	adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
-	ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+	if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
+		adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
+		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+	}
 
 	return (0);
 }
@@ -3806,8 +3814,10 @@ static void check_for_missing_keep_alive(struct ena_ad
 		device_printf(adapter->pdev,
 		    "Keep alive watchdog timeout.\n");
 		counter_u64_add(adapter->dev_stats.wd_expired, 1);
-		adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
-		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
+			adapter->reset_reason = ENA_REGS_RESET_KEEP_ALIVE_TO;
+			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+		}
 	}
 }
 
@@ -3819,8 +3829,10 @@ static void check_for_admin_com_state(struct ena_adapt
 		device_printf(adapter->pdev,
 		    "ENA admin queue is not in running state!\n");
 		counter_u64_add(adapter->dev_stats.admin_q_pause, 1);
-		adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
-		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
+			adapter->reset_reason = ENA_REGS_RESET_ADMIN_TO;
+			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+		}
 	}
 }
 
@@ -3838,9 +3850,11 @@ check_for_rx_interrupt_queue(struct ena_adapter *adapt
 
 	if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
 		device_printf(adapter->pdev, "Potential MSIX issue on Rx side "
-		   "Queue = %d. Reset the device\n", rx_ring->qid);
-		adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
-		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+		    "Queue = %d. Reset the device\n", rx_ring->qid);
+		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
+			adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
+			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+		}
 		return (EIO);
 	}
 
@@ -3878,8 +3892,13 @@ check_missing_comp_in_tx_queue(struct ena_adapter *ada
 			device_printf(adapter->pdev,
 			    "Potential MSIX issue on Tx side Queue = %d. "
 			    "Reset the device\n", tx_ring->qid);
-			adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
-			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+			if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET,
+			    adapter))) {
+				adapter->reset_reason =
+				    ENA_REGS_RESET_MISS_INTERRUPT;
+				ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET,
+				    adapter);
+			}
 			return (EIO);
 		}
 
@@ -3901,8 +3920,10 @@ check_missing_comp_in_tx_queue(struct ena_adapter *ada
 		    "The number of lost tx completion is above the threshold "
 		    "(%d > %d). Reset the device\n",
 		    missed_tx, adapter->missing_tx_threshold);
-		adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
-		ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+		if (likely(!ENA_FLAG_ISSET(ENA_FLAG_TRIGGER_RESET, adapter))) {
+			adapter->reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
+			ENA_FLAG_SET_ATOMIC(ENA_FLAG_TRIGGER_RESET, adapter);
+		}
 		rc = EIO;
 	}
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201905301345.x4UDjgS1077393>