Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 15 Oct 2012 09:04:27 -0400
From:      John Baldwin <jhb@freebsd.org>
To:        freebsd-net@freebsd.org
Cc:        Luigi Rizzo <rizzo@iet.unipi.it>, "Alexander V. Chernikov" <melifaro@freebsd.org>, Jack Vogel <jfvogel@gmail.com>, net@freebsd.org
Subject:   Re: ixgbe & if_igb RX ring locking
Message-ID:  <201210150904.27567.jhb@freebsd.org>
In-Reply-To: <507C1960.6050500@FreeBSD.org>
References:  <5079A9A1.4070403@FreeBSD.org> <CAFOYbc=N87_OECto7B8jdzmRZA-yoa_JWgvVc8kwpK9umO97rQ@mail.gmail.com> <507C1960.6050500@FreeBSD.org>

next in thread | previous in thread | raw e-mail | index | archive | help
On Monday, October 15, 2012 10:10:40 am Alexander V. Chernikov wrote:
> On 13.10.2012 23:24, Jack Vogel wrote:
> > On Sat, Oct 13, 2012 at 11:22 AM, Luigi Rizzo<rizzo@iet.unipi.it>  wrote:
> 
> >>
> >> one option could be (same as it is done in the timer
> >> routine in dummynet) to build a list of all the packets
> >> that need to be sent to if_input(), and then call
> >> if_input with the entire list outside the lock.
> >>
> >> It would be even easier if we modify the various *_input()
> >> routines to handle a list of mbufs instead of just one.
> 
> Bulk processing is generally a good idea we probably should implement.
> Probably starting from driver queue ending with marked mbufs 
> (OURS/forward/legacy processing (appletalk and similar))?
> 
> This can minimize an impact for all
> locks on RX side:
> L2
> * rx PFIL hook
> L3 (both IPv4 and IPv6)
> * global IF_ADDR_RLOCK (currently commented out)
> * Per-interface ADDR_RLOCK
> * PFIL hook
> 
>  From the first glance, there can be problems with:
> * Increased latency (we should have some kind of rx_process_limit), but 
> still
> * reader locks being acquired for much longer amount of time
> 
> >>
> >> cheers
> >> luigi
> >>
> >> Very interesting idea Luigi, will have to get that some thought.
> >
> > Jack
> 
> Returning to original post topic:
> 
> Given
> 1) we are currently binding ixgbe ithreads to CPU cores
> 2) RX queue lock is used by (indirectly) in only 2 places:
> a) ISR routine (msix or legacy irq)
> b) taskqueue routine which is scheduled if some packets remains in RX 
> queue and rx_process_limit ended OR we need something to TX
> 
> 3) in practice taskqueue routine is a nightmare for many people since 
> there is no way to stop "kernel {ix0 que}" thread eating 100% cpu after 
> some traffic burst happens: once it is called it starts to schedule 
> itself more and more replacing original ISR routine. Additionally, 
> increasing rx_process_limit does not help since taskqueue is called with 
> the same limit. Finally, currently netisr taskq threads are not bound to 
> any CPU which makes the process even more uncontrollable.

I think part of the problem here is that the taskqueue in ixgbe(4) is
bogusly rescheduled for TX handling.  Instead, ixgbe_msix_que() should
just start transmitting packets directly.

I fixed this in igb(4) here:

http://svnweb.freebsd.org/base?view=revision&revision=233708

You can try this for ixgbe(4).  It also comments out a spurious taskqueue 
reschedule from the watchdog handler that might also lower the taskqueue 
usage.  You can try changing that #if 0 to an #if 1 to test just the txeof 
changes:

Index: ixgbe.c
===================================================================
--- ixgbe.c	(revision 241579)
+++ ixgbe.c	(working copy)
@@ -149,7 +149,7 @@
 static void     ixgbe_enable_intr(struct adapter *);
 static void     ixgbe_disable_intr(struct adapter *);
 static void     ixgbe_update_stats_counters(struct adapter *);
-static bool	ixgbe_txeof(struct tx_ring *);
+static void	ixgbe_txeof(struct tx_ring *);
 static bool	ixgbe_rxeof(struct ix_queue *, int);
 static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
 static void     ixgbe_set_promisc(struct adapter *);
@@ -1439,8 +1439,9 @@
 	struct adapter	*adapter = que->adapter;
 	struct ixgbe_hw	*hw = &adapter->hw;
 	struct 		tx_ring *txr = adapter->tx_rings;
-	bool		more_tx, more_rx;
-	u32       	reg_eicr, loop = MAX_LOOP;
+	struct ifnet    *ifp = adapter->ifp;
+	bool		more_rx;
+	u32       	reg_eicr;
 
 
 	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
@@ -1454,14 +1455,16 @@
 	more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
 
 	IXGBE_TX_LOCK(txr);
-	do {
-		more_tx = ixgbe_txeof(txr);
-	} while (loop-- && more_tx);
+	ixgbe_txeof(txr);
+#if __FreeBSD_version >= 800000
+	if (!drbr_empty(ifp, txr->br))
+		ixgbe_mq_start_locked(ifp, txr, NULL);
+#else
+	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+		ixgbe_start_locked(txr, ifp);
+#endif
 	IXGBE_TX_UNLOCK(txr);
 
-	if (more_rx || more_tx)
-		taskqueue_enqueue(que->tq, &que->que_task);
-
 	/* Check for fan failure */
 	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
 	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
@@ -1474,7 +1477,10 @@
 	if (reg_eicr & IXGBE_EICR_LSC)
 		taskqueue_enqueue(adapter->tq, &adapter->link_task);
 
-	ixgbe_enable_intr(adapter);
+	if (more_rx)
+		taskqueue_enqueue(que->tq, &que->que_task);
+	else
+		ixgbe_enable_intr(adapter);
 	return;
 }
 
@@ -1491,7 +1497,8 @@
 	struct adapter  *adapter = que->adapter;
 	struct tx_ring	*txr = que->txr;
 	struct rx_ring	*rxr = que->rxr;
-	bool		more_tx, more_rx;
+	struct ifnet    *ifp = adapter->ifp;
+	bool		more_rx;
 	u32		newitr = 0;
 
 	ixgbe_disable_queue(adapter, que->msix);
@@ -1500,18 +1507,14 @@
 	more_rx = ixgbe_rxeof(que, adapter->rx_process_limit);
 
 	IXGBE_TX_LOCK(txr);
-	more_tx = ixgbe_txeof(txr);
-	/*
-	** Make certain that if the stack 
-	** has anything queued the task gets
-	** scheduled to handle it.
-	*/
-#if __FreeBSD_version < 800000
-	if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd))
+	ixgbe_txeof(txr);
+#if __FreeBSD_version >= 800000
+	if (!drbr_empty(ifp, txr->br))
+		ixgbe_mq_start_locked(ifp, txr, NULL);
 #else
-	if (!drbr_empty(adapter->ifp, txr->br))
+	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+		ixgbe_start_locked(txr, ifp);
 #endif
-		more_tx = 1;
 	IXGBE_TX_UNLOCK(txr);
 
 	/* Do AIM now? */
@@ -1565,7 +1568,7 @@
         rxr->packets = 0;
 
 no_calc:
-	if (more_tx || more_rx)
+	if (more_rx)
 		taskqueue_enqueue(que->tq, &que->que_task);
 	else /* Reenable this interrupt */
 		ixgbe_enable_queue(adapter, que->msix);
@@ -2049,8 +2052,10 @@
 			++hung;
 		if (txr->queue_status & IXGBE_QUEUE_DEPLETED)
 			++busy;
+#if 0
 		if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0)
 			taskqueue_enqueue(que->tq, &que->que_task);
+#endif
         }
 	/* Only truely watchdog if all queues show hung */
         if (hung == adapter->num_queues)
@@ -3548,7 +3556,7 @@
  *  tx_buffer is put back on the free queue.
  *
  **********************************************************************/
-static bool
+static void
 ixgbe_txeof(struct tx_ring *txr)
 {
 	struct adapter	*adapter = txr->adapter;
@@ -3597,13 +3605,13 @@
 			IXGBE_CORE_UNLOCK(adapter);
 			IXGBE_TX_LOCK(txr);
 		}
-		return FALSE;
+		return;
 	}
 #endif /* DEV_NETMAP */
 
 	if (txr->tx_avail == adapter->num_tx_desc) {
 		txr->queue_status = IXGBE_QUEUE_IDLE;
-		return FALSE;
+		return;
 	}
 
 	processed = 0;
@@ -3613,7 +3621,7 @@
 	tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first];
 	last = tx_buffer->eop_index;
 	if (last == -1)
-		return FALSE;
+		return;
 	eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last];
 
 	/*
@@ -3693,12 +3701,8 @@
 	if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD)
 		txr->queue_status &= ~IXGBE_QUEUE_DEPLETED;
 
-	if (txr->tx_avail == adapter->num_tx_desc) {
+	if (txr->tx_avail == adapter->num_tx_desc)
 		txr->queue_status = IXGBE_QUEUE_IDLE;
-		return (FALSE);
-	}
-
-	return TRUE;
 }
 
 /*********************************************************************

-- 
John Baldwin



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201210150904.27567.jhb>