Date: Mon, 15 Oct 2012 09:04:27 -0400 From: John Baldwin <jhb@freebsd.org> To: freebsd-net@freebsd.org Cc: Luigi Rizzo <rizzo@iet.unipi.it>, "Alexander V. Chernikov" <melifaro@freebsd.org>, Jack Vogel <jfvogel@gmail.com>, net@freebsd.org Subject: Re: ixgbe & if_igb RX ring locking Message-ID: <201210150904.27567.jhb@freebsd.org> In-Reply-To: <507C1960.6050500@FreeBSD.org> References: <5079A9A1.4070403@FreeBSD.org> <CAFOYbc=N87_OECto7B8jdzmRZA-yoa_JWgvVc8kwpK9umO97rQ@mail.gmail.com> <507C1960.6050500@FreeBSD.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On Monday, October 15, 2012 10:10:40 am Alexander V. Chernikov wrote: > On 13.10.2012 23:24, Jack Vogel wrote: > > On Sat, Oct 13, 2012 at 11:22 AM, Luigi Rizzo<rizzo@iet.unipi.it> wrote: > > >> > >> one option could be (same as it is done in the timer > >> routine in dummynet) to build a list of all the packets > >> that need to be sent to if_input(), and then call > >> if_input with the entire list outside the lock. > >> > >> It would be even easier if we modify the various *_input() > >> routines to handle a list of mbufs instead of just one. > > Bulk processing is generally a good idea we probably should implement. > Probably starting from driver queue ending with marked mbufs > (OURS/forward/legacy processing (appletalk and similar))? > > This can minimize an impact for all > locks on RX side: > L2 > * rx PFIL hook > L3 (both IPv4 and IPv6) > * global IF_ADDR_RLOCK (currently commented out) > * Per-interface ADDR_RLOCK > * PFIL hook > > From the first glance, there can be problems with: > * Increased latency (we should have some kind of rx_process_limit), but > still > * reader locks being acquired for much longer amount of time > > >> > >> cheers > >> luigi > >> > >> Very interesting idea Luigi, will have to get that some thought. > > > > Jack > > Returning to original post topic: > > Given > 1) we are currently binding ixgbe ithreads to CPU cores > 2) RX queue lock is used by (indirectly) in only 2 places: > a) ISR routine (msix or legacy irq) > b) taskqueue routine which is scheduled if some packets remains in RX > queue and rx_process_limit ended OR we need something to TX > > 3) in practice taskqueue routine is a nightmare for many people since > there is no way to stop "kernel {ix0 que}" thread eating 100% cpu after > some traffic burst happens: once it is called it starts to schedule > itself more and more replacing original ISR routine. Additionally, > increasing rx_process_limit does not help since taskqueue is called with > the same limit. Finally, currently netisr taskq threads are not bound to > any CPU which makes the process even more uncontrollable. I think part of the problem here is that the taskqueue in ixgbe(4) is bogusly rescheduled for TX handling. Instead, ixgbe_msix_que() should just start transmitting packets directly. I fixed this in igb(4) here: http://svnweb.freebsd.org/base?view=revision&revision=233708 You can try this for ixgbe(4). It also comments out a spurious taskqueue reschedule from the watchdog handler that might also lower the taskqueue usage. You can try changing that #if 0 to an #if 1 to test just the txeof changes: Index: ixgbe.c =================================================================== --- ixgbe.c (revision 241579) +++ ixgbe.c (working copy) @@ -149,7 +149,7 @@ static void ixgbe_enable_intr(struct adapter *); static void ixgbe_disable_intr(struct adapter *); static void ixgbe_update_stats_counters(struct adapter *); -static bool ixgbe_txeof(struct tx_ring *); +static void ixgbe_txeof(struct tx_ring *); static bool ixgbe_rxeof(struct ix_queue *, int); static void ixgbe_rx_checksum(u32, struct mbuf *, u32); static void ixgbe_set_promisc(struct adapter *); @@ -1439,8 +1439,9 @@ struct adapter *adapter = que->adapter; struct ixgbe_hw *hw = &adapter->hw; struct tx_ring *txr = adapter->tx_rings; - bool more_tx, more_rx; - u32 reg_eicr, loop = MAX_LOOP; + struct ifnet *ifp = adapter->ifp; + bool more_rx; + u32 reg_eicr; reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR); @@ -1454,14 +1455,16 @@ more_rx = ixgbe_rxeof(que, adapter->rx_process_limit); IXGBE_TX_LOCK(txr); - do { - more_tx = ixgbe_txeof(txr); - } while (loop-- && more_tx); + ixgbe_txeof(txr); +#if __FreeBSD_version >= 800000 + if (!drbr_empty(ifp, txr->br)) + ixgbe_mq_start_locked(ifp, txr, NULL); +#else + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + ixgbe_start_locked(txr, ifp); +#endif IXGBE_TX_UNLOCK(txr); - if (more_rx || more_tx) - taskqueue_enqueue(que->tq, &que->que_task); - /* Check for fan failure */ if ((hw->phy.media_type == ixgbe_media_type_copper) && (reg_eicr & IXGBE_EICR_GPI_SDP1)) { @@ -1474,7 +1477,10 @@ if (reg_eicr & IXGBE_EICR_LSC) taskqueue_enqueue(adapter->tq, &adapter->link_task); - ixgbe_enable_intr(adapter); + if (more_rx) + taskqueue_enqueue(que->tq, &que->que_task); + else + ixgbe_enable_intr(adapter); return; } @@ -1491,7 +1497,8 @@ struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; - bool more_tx, more_rx; + struct ifnet *ifp = adapter->ifp; + bool more_rx; u32 newitr = 0; ixgbe_disable_queue(adapter, que->msix); @@ -1500,18 +1507,14 @@ more_rx = ixgbe_rxeof(que, adapter->rx_process_limit); IXGBE_TX_LOCK(txr); - more_tx = ixgbe_txeof(txr); - /* - ** Make certain that if the stack - ** has anything queued the task gets - ** scheduled to handle it. - */ -#if __FreeBSD_version < 800000 - if (!IFQ_DRV_IS_EMPTY(&adapter->ifp->if_snd)) + ixgbe_txeof(txr); +#if __FreeBSD_version >= 800000 + if (!drbr_empty(ifp, txr->br)) + ixgbe_mq_start_locked(ifp, txr, NULL); #else - if (!drbr_empty(adapter->ifp, txr->br)) + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + ixgbe_start_locked(txr, ifp); #endif - more_tx = 1; IXGBE_TX_UNLOCK(txr); /* Do AIM now? */ @@ -1565,7 +1568,7 @@ rxr->packets = 0; no_calc: - if (more_tx || more_rx) + if (more_rx) taskqueue_enqueue(que->tq, &que->que_task); else /* Reenable this interrupt */ ixgbe_enable_queue(adapter, que->msix); @@ -2049,8 +2052,10 @@ ++hung; if (txr->queue_status & IXGBE_QUEUE_DEPLETED) ++busy; +#if 0 if ((txr->queue_status & IXGBE_QUEUE_IDLE) == 0) taskqueue_enqueue(que->tq, &que->que_task); +#endif } /* Only truely watchdog if all queues show hung */ if (hung == adapter->num_queues) @@ -3548,7 +3556,7 @@ * tx_buffer is put back on the free queue. * **********************************************************************/ -static bool +static void ixgbe_txeof(struct tx_ring *txr) { struct adapter *adapter = txr->adapter; @@ -3597,13 +3605,13 @@ IXGBE_CORE_UNLOCK(adapter); IXGBE_TX_LOCK(txr); } - return FALSE; + return; } #endif /* DEV_NETMAP */ if (txr->tx_avail == adapter->num_tx_desc) { txr->queue_status = IXGBE_QUEUE_IDLE; - return FALSE; + return; } processed = 0; @@ -3613,7 +3621,7 @@ tx_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[first]; last = tx_buffer->eop_index; if (last == -1) - return FALSE; + return; eop_desc = (struct ixgbe_legacy_tx_desc *)&txr->tx_base[last]; /* @@ -3693,12 +3701,8 @@ if (txr->tx_avail > IXGBE_TX_CLEANUP_THRESHOLD) txr->queue_status &= ~IXGBE_QUEUE_DEPLETED; - if (txr->tx_avail == adapter->num_tx_desc) { + if (txr->tx_avail == adapter->num_tx_desc) txr->queue_status = IXGBE_QUEUE_IDLE; - return (FALSE); - } - - return TRUE; } /********************************************************************* -- John Baldwin
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201210150904.27567.jhb>