Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 16 Apr 2015 06:23:59 +0000 (UTC)
From:      Sean Bruno <sbruno@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r281590 - projects/em_mq/sys/dev/e1000
Message-ID:  <201504160623.t3G6NxEf062304@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: sbruno
Date: Thu Apr 16 06:23:58 2015
New Revision: 281590
URL: https://svnweb.freebsd.org/changeset/base/281590

Log:
  Get 82574 tx queues working reliably via a few bits of stolen errata and
  a few bits of stolen igb(4) code.
  
  - Bind interrupts to specific CPUs (stolen from igb).
  - Don't ever let em_mq_start() have a negative value for its tx queue.
  - Allow the interrupt for the link status to be shareable.
  - Setup 2nd queue TXDCTL bits to enable it fully, without this, it times out.
    Values derived from linux e1000e driver, netdev.c.  Committed due to apparent
    erratum.
  - When configuring RSS, use a modulo of the num_rx_queues, instead of magic number 2
  - Add premptive comment when configuring RXDCTL indicating why the 2nd queue is not
    being configured here.
  - Revert enhanced queue status handling, from svn revision 281045 as it fails to handle
    watchdog events at all in this driver.

Modified:
  projects/em_mq/sys/dev/e1000/if_em.c
  projects/em_mq/sys/dev/e1000/if_em.h

Modified: projects/em_mq/sys/dev/e1000/if_em.c
==============================================================================
--- projects/em_mq/sys/dev/e1000/if_em.c	Thu Apr 16 06:12:25 2015	(r281589)
+++ projects/em_mq/sys/dev/e1000/if_em.c	Thu Apr 16 06:23:58 2015	(r281590)
@@ -53,6 +53,7 @@
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/rman.h>
+#include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
@@ -401,6 +402,13 @@ SYSCTL_INT(_hw_em, OID_AUTO, num_tx_queu
 static int em_num_rx_queues = 1;
 SYSCTL_INT(_hw_em, OID_AUTO, num_rx_queues, CTLFLAG_RDTUN, &em_num_rx_queues, 0,
     "82574 only: Number of rx queues to configure, 0 indicates autoconfigure");
+
+/*
+** Global variable to store last used CPU when binding queues
+** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
+** queue is bound to a cpu.
+*/
+static int em_last_bind_cpu = -1;
 #endif
 
 /* How many packets rxeof tries to clean at a time */
@@ -899,8 +907,7 @@ em_resume(device_t dev)
 		for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
 			EM_TX_LOCK(txr);
 #ifdef EM_MULTIQUEUE
-			if (((txr->queue_status & EM_QUEUE_DEPLETED) == 0) &&
-			    !drbr_empty(ifp, txr->br))
+			if (!drbr_empty(ifp, txr->br))
 				em_mq_start_locked(ifp, txr);
 #else
 			if (!if_sendq_empty(ifp))
@@ -958,7 +965,7 @@ em_start_locked(if_t ifp, struct tx_ring
 
 		/* Set timeout in case hardware has problems transmitting. */
 		txr->watchdog_time = ticks;
-                txr->queue_status |= EM_QUEUE_WORKING;
+                txr->queue_status = EM_QUEUE_WORKING;
 	}
 
 	return;
@@ -994,7 +1001,7 @@ em_mq_start(if_t ifp, struct mbuf *m)
 {
 	struct adapter	*adapter = if_getsoftc(ifp);
 	struct tx_ring	*txr = adapter->tx_rings;
-	int 		i, error;
+	unsigned int	i, error;
 
 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
 		i = m->m_pkthdr.flowid % adapter->num_tx_queues;
@@ -1058,14 +1065,15 @@ em_mq_start_locked(if_t ifp, struct tx_r
 
 	if (enq > 0) {
                 /* Set the watchdog */
-                txr->queue_status |= EM_QUEUE_WORKING;
+                txr->queue_status = EM_QUEUE_WORKING;
 		txr->watchdog_time = ticks;
 	}
 
-	if (txr->tx_avail < (adapter->num_tx_desc / 8))
-		em_txeof(txr);
 	if (txr->tx_avail < EM_MAX_SCATTER)
-		txr->queue_status |= EM_QUEUE_DEPLETED;
+		em_txeof(txr);
+	if (txr->tx_avail < EM_MAX_SCATTER) {
+		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0);
+	}
 	return (err);
 }
 
@@ -1549,9 +1557,9 @@ em_handle_que(void *context, int pending
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 
-
 	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
 		bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
+
 		EM_TX_LOCK(txr);
 		em_txeof(txr);
 #ifdef EM_MULTIQUEUE
@@ -1589,13 +1597,13 @@ em_msix_tx(void *arg)
 	EM_TX_LOCK(txr);
 	em_txeof(txr);
 #ifdef EM_MULTIQUEUE
-	if (((txr->queue_status & EM_QUEUE_DEPLETED) == 0) &&
-	    !drbr_empty(ifp, txr->br))
+	if (!drbr_empty(ifp, txr->br))
 		em_mq_start_locked(ifp, txr);
 #else
 	if (!if_sendq_empty(ifp))
 		em_start_locked(ifp, txr);
 #endif
+
 	/* Reenable this interrupt */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims);
 	EM_TX_UNLOCK(txr);
@@ -1621,9 +1629,10 @@ em_msix_rx(void *arg)
 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 	if (more)
 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
-	else
+	else {
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+	}
 	return;
 }
 
@@ -1660,9 +1669,10 @@ em_handle_rx(void *context, int pending)
 	more = em_rxeof(rxr, adapter->rx_process_limit, NULL);
 	if (more)
 		taskqueue_enqueue(rxr->tq, &rxr->rx_task);
-	else
+	else {
 		/* Reenable this interrupt */
 		E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims);
+	}
 }
 
 static void
@@ -2253,7 +2263,6 @@ em_local_timer(void *arg)
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct rx_ring	*rxr = adapter->rx_rings;
 	u32		trigger;
-	int		hung = 0, busy = 0;
 
 	EM_CORE_LOCK_ASSERT(adapter);
 
@@ -2277,24 +2286,16 @@ em_local_timer(void *arg)
 	** and the HUNG state will be static if set.
 	*/
 	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
-		if ((txr->queue_status & EM_QUEUE_HUNG) &&
-		    (adapter->pause_frames == 0))
-			++hung;
-		if (txr->queue_status & EM_QUEUE_DEPLETED)
-			++busy;
+		if ((txr->queue_status == EM_QUEUE_HUNG) &&
+		    (adapter->pause_frames == 0)) {
+			em_print_debug_info(adapter);
+			goto hung;
+		}
 		/* Schedule a TX tasklet if needed */
-		if ((txr->queue_status & EM_QUEUE_IDLE) == 0)
+		if (txr->tx_avail <= EM_MAX_SCATTER)
 			taskqueue_enqueue(txr->tq, &txr->tx_task);
 	}
 	
-	if (hung == adapter->num_tx_queues)
-		goto timeout;
-	if (busy == adapter->num_tx_queues)
-		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
-	else if ((if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) &&
-		    (busy < adapter->num_tx_queues))
-			if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
-
 	adapter->pause_frames = 0;
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 #ifndef DEVICE_POLLING
@@ -2302,7 +2303,7 @@ em_local_timer(void *arg)
 	E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger);
 #endif
 	return;
-timeout:
+hung:
 	/* Looks like we're hung */
 	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
 	device_printf(adapter->dev,
@@ -2312,11 +2313,16 @@ timeout:
 	device_printf(adapter->dev,"TX(%d) desc avail = %d,"
 	    "Next TX to Clean = %d\n",
 	    txr->me, txr->tx_avail, txr->next_to_clean);
+
+
+	em_print_debug_info(adapter);
 	if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
 	adapter->watchdog_events++;
+	adapter->pause_frames = 0;
 	em_init_locked(adapter);
 }
 
+
 static void
 em_update_link_status(struct adapter *adapter)
 {
@@ -2381,11 +2387,10 @@ em_update_link_status(struct adapter *ad
 		if (bootverbose)
 			device_printf(dev, "Link is Down\n");
 		adapter->link_active = 0;
-		/* This can sleep */
-		if_link_state_change(ifp, LINK_STATE_DOWN);
-		/* Reset queue state */
+		/* Link down, disable watchdog */
 		for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
 			txr->queue_status = EM_QUEUE_IDLE;
+		if_link_state_change(ifp, LINK_STATE_DOWN);
 	}
 }
 
@@ -2553,13 +2558,14 @@ em_allocate_msix(struct adapter *adapter
 	struct		tx_ring *txr = adapter->tx_rings;
 	struct		rx_ring *rxr = adapter->rx_rings;
 	int		error, rid, vector = 0;
+	int		cpu_id = 0;
 
 
 	/* Make sure all interrupts are disabled */
 	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
 
 	/* First set up ring resources */
-	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
+	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++, vector++) {
 
 		/* RX ring */
 		rid = vector + 1;
@@ -2581,12 +2587,18 @@ em_allocate_msix(struct adapter *adapter
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i);
 #endif
-		rxr->msix = vector++; /* NOTE increment vector for TX */
+		rxr->msix = vector;
+
+		if (em_last_bind_cpu < 0)
+			em_last_bind_cpu = CPU_FIRST();
+		cpu_id = em_last_bind_cpu;
+		bus_bind_intr(dev, rxr->res, cpu_id);
+
 		TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr);
 		rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &rxr->tq);
 		taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (qid %d)",
-		    device_get_nameunit(adapter->dev), i);
+		    device_get_nameunit(adapter->dev), cpu_id);
 		/*
 		** Set the bit to enable interrupt
 		** in E1000_IMS -- bits 20 and 21
@@ -2595,9 +2607,11 @@ em_allocate_msix(struct adapter *adapter
 		*/
 		rxr->ims = 1 << (20 + i);
 		adapter->ivars |= (8 | rxr->msix) << (i * 4);
+
+		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
 	}
 
-	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
+	for (int i = 0; i < adapter->num_tx_queues; i++, txr++, vector++) {
 		/* TX ring */
 		rid = vector + 1;
 		txr->res = bus_alloc_resource_any(dev,
@@ -2617,12 +2631,18 @@ em_allocate_msix(struct adapter *adapter
 #if __FreeBSD_version >= 800504
 		bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i);
 #endif
-		txr->msix = vector++; /* Increment vector for next pass */
+		txr->msix = vector;
+
+                if (em_last_bind_cpu < 0)
+                        em_last_bind_cpu = CPU_FIRST();
+                cpu_id = em_last_bind_cpu;
+                bus_bind_intr(dev, txr->res, cpu_id);
+
 		TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr);
 		txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT,
 		    taskqueue_thread_enqueue, &txr->tq);
 		taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (qid %d)",
-		    device_get_nameunit(adapter->dev), i);
+		    device_get_nameunit(adapter->dev), cpu_id);
 		/*
 		** Set the bit to enable interrupt
 		** in E1000_IMS -- bits 22 and 23
@@ -2631,12 +2651,14 @@ em_allocate_msix(struct adapter *adapter
 		*/
 		txr->ims = 1 << (22 + i);
 		adapter->ivars |= (8 | txr->msix) << (8 + (i * 4));
+
+		em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu);
 	}
 
 	/* Link interrupt */
-	++rid;
+	rid = vector + 1;
 	adapter->res = bus_alloc_resource_any(dev,
-	    SYS_RES_IRQ, &rid, RF_ACTIVE);
+	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
 	if (!adapter->res) {
 		device_printf(dev,"Unable to allocate "
 		    "bus resource: Link interrupt [%d]\n", rid);
@@ -2652,7 +2674,7 @@ em_allocate_msix(struct adapter *adapter
 		return (error);
 	}
 #if __FreeBSD_version >= 800504
-		bus_describe_intr(dev, adapter->res, adapter->tag, "link");
+	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
 #endif
 	adapter->linkvec = vector;
 	adapter->ivars |=  (8 | vector) << 16;
@@ -3423,6 +3445,7 @@ em_setup_transmit_ring(struct tx_ring *t
 
 	/* Set number of descriptors available */
 	txr->tx_avail = adapter->num_tx_desc;
+	txr->queue_status = EM_QUEUE_IDLE;
 
 	/* Clear checksum offload context. */
 	txr->last_hw_offload = 0;
@@ -3462,7 +3485,7 @@ em_initialize_transmit_unit(struct adapt
 {
 	struct tx_ring	*txr = adapter->tx_rings;
 	struct e1000_hw	*hw = &adapter->hw;
-	u32	tctl, tarc, tipg = 0;
+	u32	tctl, txdctl = 0, tarc, tipg = 0;
 
 	 INIT_DEBUGOUT("em_initialize_transmit_unit: begin");
 
@@ -3484,6 +3507,12 @@ em_initialize_transmit_unit(struct adapt
 		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
 
 		txr->queue_status = EM_QUEUE_IDLE;
+		txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
+                txdctl |= 0x1f; /* PTHRESH */
+                txdctl |= 1 << 8; /* HTHRESH */
+                txdctl |= 1 << 16;/* WTHRESH */
+                txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 
+                E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
 	}
 
 	/* Set the default values for the Tx Inter Packet Gap timer */
@@ -3943,10 +3972,18 @@ em_txeof(struct tx_ring *txr)
 	** will examine this and do a reset if needed.
 	*/
 	if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG))
-		txr->queue_status |= EM_QUEUE_HUNG;
+		txr->queue_status = EM_QUEUE_HUNG;
 
-	if (txr->tx_avail >= (adapter->num_tx_desc / 8))
-		txr->queue_status &= ~EM_QUEUE_DEPLETED;
+        /*
+         * If we have a minimum free, clear IFF_DRV_OACTIVE
+         * to tell the stack that it is OK to send packets.
+	 * Notice that all writes of OACTIVE happen under the
+	 * TX lock which, with a single queue, guarantees 
+	 * sanity.
+         */
+        if (txr->tx_avail >= EM_MAX_SCATTER) {
+		if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
+	}
 
 	/* Disable watchdog if all clean */
 	if (txr->tx_avail == adapter->num_tx_desc) {
@@ -4371,7 +4408,7 @@ em_initialize_receive_unit(struct adapte
 		reta = 0;
 		for (i = 0; i < 4; ++i) {
 			uint32_t q;
-			q = (i % 2) << 7;
+			q = (i % adapter->num_rx_queues) << 7;
 			reta |= q << (8 * i);
 		}
 		for (i = 0; i < 32; ++i)
@@ -4419,7 +4456,14 @@ em_initialize_receive_unit(struct adapte
 #endif /* DEV_NETMAP */
 		E1000_WRITE_REG(hw, E1000_RDT(i), rdt);
 	}
-	/* Set PTHRESH for improved jumbo performance */
+
+	/*
+	 * Set PTHRESH for improved jumbo performance
+	 * According to 10.2.5.11 of Intel 82574 Datasheet,
+	 * RXDCTL(1) is written whenever RXDCTL(0) is written.
+	 * Only write to RXDCTL(1) if there is a need for different
+	 * settings.
+	 */
 	if (((adapter->hw.mac.type == e1000_ich9lan) ||
 	    (adapter->hw.mac.type == e1000_pch2lan) ||
 	    (adapter->hw.mac.type == e1000_ich10lan)) &&
@@ -5860,21 +5904,25 @@ em_print_debug_info(struct adapter *adap
 	else
 		printf("and ACTIVE\n");
 
-	device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
-	    E1000_READ_REG(&adapter->hw, E1000_TDH(0)),
-	    E1000_READ_REG(&adapter->hw, E1000_TDT(0)));
-	device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
-	    E1000_READ_REG(&adapter->hw, E1000_RDH(0)),
-	    E1000_READ_REG(&adapter->hw, E1000_RDT(0)));
-	device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
-	device_printf(dev, "TX descriptors avail = %d\n",
-	    txr->tx_avail);
-	device_printf(dev, "Tx Descriptors avail failure = %ld\n",
-	    txr->no_desc_avail);
-	device_printf(dev, "RX discarded packets = %ld\n",
-	    rxr->rx_discarded);
-	device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
-	device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
+		device_printf(dev, "hw tdh = %d, hw tdt = %d\n",
+	    		E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
+	    		E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
+		device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status);
+		device_printf(dev, "TX descriptors avail = %d\n",
+	    		txr->tx_avail);
+		device_printf(dev, "Tx Descriptors avail failure = %ld\n",
+	    		txr->no_desc_avail);
+	}
+	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
+		device_printf(dev, "hw rdh = %d, hw rdt = %d\n",
+	    		E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
+	    		E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
+		device_printf(dev, "RX discarded packets = %ld\n",
+	    		rxr->rx_discarded);
+		device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check);
+		device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh);
+	}
 }
 
 #ifdef EM_MULTIQUEUE

Modified: projects/em_mq/sys/dev/e1000/if_em.h
==============================================================================
--- projects/em_mq/sys/dev/e1000/if_em.h	Thu Apr 16 06:12:25 2015	(r281589)
+++ projects/em_mq/sys/dev/e1000/if_em.h	Thu Apr 16 06:23:58 2015	(r281590)
@@ -191,7 +191,6 @@
 #define EM_QUEUE_IDLE			0
 #define EM_QUEUE_WORKING		1
 #define EM_QUEUE_HUNG			2
-#define EM_QUEUE_DEPLETED		4
 
 /*
  * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201504160623.t3G6NxEf062304>