Date: Thu, 16 Apr 2015 06:23:59 +0000 (UTC) From: Sean Bruno <sbruno@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r281590 - projects/em_mq/sys/dev/e1000 Message-ID: <201504160623.t3G6NxEf062304@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: sbruno Date: Thu Apr 16 06:23:58 2015 New Revision: 281590 URL: https://svnweb.freebsd.org/changeset/base/281590 Log: Get 82574 tx queues working reliably via a few bits of stolen errata and a few bits of stolen igb(4) code. - Bind interrupts to specific CPUs (stolen from igb). - Don't ever let em_mq_start() have a negative value for its tx queue. - Allow the interrupt for the link status to be shareable. - Setup 2nd queue TXDCTL bits to enable it fully, without this, it times out. Values derived from linux e1000e driver, netdev.c. Committed due to apparent erratum. - When configuring RSS, use a modulo of the num_rx_queues, instead of magic number 2 - Add premptive comment when configuring RXDCTL indicating why the 2nd queue is not being configured here. - Revert enhanced queue status handling, from svn revision 281045 as it fails to handle watchdog events at all in this driver. Modified: projects/em_mq/sys/dev/e1000/if_em.c projects/em_mq/sys/dev/e1000/if_em.h Modified: projects/em_mq/sys/dev/e1000/if_em.c ============================================================================== --- projects/em_mq/sys/dev/e1000/if_em.c Thu Apr 16 06:12:25 2015 (r281589) +++ projects/em_mq/sys/dev/e1000/if_em.c Thu Apr 16 06:23:58 2015 (r281590) @@ -53,6 +53,7 @@ #include <sys/mbuf.h> #include <sys/module.h> #include <sys/rman.h> +#include <sys/smp.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/sysctl.h> @@ -401,6 +402,13 @@ SYSCTL_INT(_hw_em, OID_AUTO, num_tx_queu static int em_num_rx_queues = 1; SYSCTL_INT(_hw_em, OID_AUTO, num_rx_queues, CTLFLAG_RDTUN, &em_num_rx_queues, 0, "82574 only: Number of rx queues to configure, 0 indicates autoconfigure"); + +/* +** Global variable to store last used CPU when binding queues +** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a +** queue is bound to a cpu. +*/ +static int em_last_bind_cpu = -1; #endif /* How many packets rxeof tries to clean at a time */ @@ -899,8 +907,7 @@ em_resume(device_t dev) for (int i = 0; i < adapter->num_tx_queues; i++, txr++) { EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE - if (((txr->queue_status & EM_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) + if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) @@ -958,7 +965,7 @@ em_start_locked(if_t ifp, struct tx_ring /* Set timeout in case hardware has problems transmitting. */ txr->watchdog_time = ticks; - txr->queue_status |= EM_QUEUE_WORKING; + txr->queue_status = EM_QUEUE_WORKING; } return; @@ -994,7 +1001,7 @@ em_mq_start(if_t ifp, struct mbuf *m) { struct adapter *adapter = if_getsoftc(ifp); struct tx_ring *txr = adapter->tx_rings; - int i, error; + unsigned int i, error; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % adapter->num_tx_queues; @@ -1058,14 +1065,15 @@ em_mq_start_locked(if_t ifp, struct tx_r if (enq > 0) { /* Set the watchdog */ - txr->queue_status |= EM_QUEUE_WORKING; + txr->queue_status = EM_QUEUE_WORKING; txr->watchdog_time = ticks; } - if (txr->tx_avail < (adapter->num_tx_desc / 8)) - em_txeof(txr); if (txr->tx_avail < EM_MAX_SCATTER) - txr->queue_status |= EM_QUEUE_DEPLETED; + em_txeof(txr); + if (txr->tx_avail < EM_MAX_SCATTER) { + if_setdrvflagbits(ifp, IFF_DRV_OACTIVE,0); + } return (err); } @@ -1549,9 +1557,9 @@ em_handle_que(void *context, int pending struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); + EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE @@ -1589,13 +1597,13 @@ em_msix_tx(void *arg) EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE - if (((txr->queue_status & EM_QUEUE_DEPLETED) == 0) && - !drbr_empty(ifp, txr->br)) + if (!drbr_empty(ifp, txr->br)) em_mq_start_locked(ifp, txr); #else if (!if_sendq_empty(ifp)) em_start_locked(ifp, txr); #endif + /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); @@ -1621,9 +1629,10 @@ em_msix_rx(void *arg) more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); - else + else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); + } return; } @@ -1660,9 +1669,10 @@ em_handle_rx(void *context, int pending) more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); - else + else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); + } } static void @@ -2253,7 +2263,6 @@ em_local_timer(void *arg) struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; u32 trigger; - int hung = 0, busy = 0; EM_CORE_LOCK_ASSERT(adapter); @@ -2277,24 +2286,16 @@ em_local_timer(void *arg) ** and the HUNG state will be static if set. */ for (int i = 0; i < adapter->num_tx_queues; i++, txr++) { - if ((txr->queue_status & EM_QUEUE_HUNG) && - (adapter->pause_frames == 0)) - ++hung; - if (txr->queue_status & EM_QUEUE_DEPLETED) - ++busy; + if ((txr->queue_status == EM_QUEUE_HUNG) && + (adapter->pause_frames == 0)) { + em_print_debug_info(adapter); + goto hung; + } /* Schedule a TX tasklet if needed */ - if ((txr->queue_status & EM_QUEUE_IDLE) == 0) + if (txr->tx_avail <= EM_MAX_SCATTER) taskqueue_enqueue(txr->tq, &txr->tx_task); } - if (hung == adapter->num_tx_queues) - goto timeout; - if (busy == adapter->num_tx_queues) - if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0); - else if ((if_getdrvflags(adapter->ifp) & IFF_DRV_OACTIVE) && - (busy < adapter->num_tx_queues)) - if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); - adapter->pause_frames = 0; callout_reset(&adapter->timer, hz, em_local_timer, adapter); #ifndef DEVICE_POLLING @@ -2302,7 +2303,7 @@ em_local_timer(void *arg) E1000_WRITE_REG(&adapter->hw, E1000_ICS, trigger); #endif return; -timeout: +hung: /* Looks like we're hung */ device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); device_printf(adapter->dev, @@ -2312,11 +2313,16 @@ timeout: device_printf(adapter->dev,"TX(%d) desc avail = %d," "Next TX to Clean = %d\n", txr->me, txr->tx_avail, txr->next_to_clean); + + + em_print_debug_info(adapter); if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); adapter->watchdog_events++; + adapter->pause_frames = 0; em_init_locked(adapter); } + static void em_update_link_status(struct adapter *adapter) { @@ -2381,11 +2387,10 @@ em_update_link_status(struct adapter *ad if (bootverbose) device_printf(dev, "Link is Down\n"); adapter->link_active = 0; - /* This can sleep */ - if_link_state_change(ifp, LINK_STATE_DOWN); - /* Reset queue state */ + /* Link down, disable watchdog */ for (int i = 0; i < adapter->num_tx_queues; i++, txr++) txr->queue_status = EM_QUEUE_IDLE; + if_link_state_change(ifp, LINK_STATE_DOWN); } } @@ -2553,13 +2558,14 @@ em_allocate_msix(struct adapter *adapter struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; int error, rid, vector = 0; + int cpu_id = 0; /* Make sure all interrupts are disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* First set up ring resources */ - for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) { + for (int i = 0; i < adapter->num_rx_queues; i++, rxr++, vector++) { /* RX ring */ rid = vector + 1; @@ -2581,12 +2587,18 @@ em_allocate_msix(struct adapter *adapter #if __FreeBSD_version >= 800504 bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i); #endif - rxr->msix = vector++; /* NOTE increment vector for TX */ + rxr->msix = vector; + + if (em_last_bind_cpu < 0) + em_last_bind_cpu = CPU_FIRST(); + cpu_id = em_last_bind_cpu; + bus_bind_intr(dev, rxr->res, cpu_id); + TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, taskqueue_thread_enqueue, &rxr->tq); taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (qid %d)", - device_get_nameunit(adapter->dev), i); + device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 20 and 21 @@ -2595,9 +2607,11 @@ em_allocate_msix(struct adapter *adapter */ rxr->ims = 1 << (20 + i); adapter->ivars |= (8 | rxr->msix) << (i * 4); + + em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } - for (int i = 0; i < adapter->num_tx_queues; i++, txr++) { + for (int i = 0; i < adapter->num_tx_queues; i++, txr++, vector++) { /* TX ring */ rid = vector + 1; txr->res = bus_alloc_resource_any(dev, @@ -2617,12 +2631,18 @@ em_allocate_msix(struct adapter *adapter #if __FreeBSD_version >= 800504 bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i); #endif - txr->msix = vector++; /* Increment vector for next pass */ + txr->msix = vector; + + if (em_last_bind_cpu < 0) + em_last_bind_cpu = CPU_FIRST(); + cpu_id = em_last_bind_cpu; + bus_bind_intr(dev, txr->res, cpu_id); + TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (qid %d)", - device_get_nameunit(adapter->dev), i); + device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 22 and 23 @@ -2631,12 +2651,14 @@ em_allocate_msix(struct adapter *adapter */ txr->ims = 1 << (22 + i); adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); + + em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } /* Link interrupt */ - ++rid; + rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_ACTIVE); + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate " "bus resource: Link interrupt [%d]\n", rid); @@ -2652,7 +2674,7 @@ em_allocate_msix(struct adapter *adapter return (error); } #if __FreeBSD_version >= 800504 - bus_describe_intr(dev, adapter->res, adapter->tag, "link"); + bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; adapter->ivars |= (8 | vector) << 16; @@ -3423,6 +3445,7 @@ em_setup_transmit_ring(struct tx_ring *t /* Set number of descriptors available */ txr->tx_avail = adapter->num_tx_desc; + txr->queue_status = EM_QUEUE_IDLE; /* Clear checksum offload context. */ txr->last_hw_offload = 0; @@ -3462,7 +3485,7 @@ em_initialize_transmit_unit(struct adapt { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; - u32 tctl, tarc, tipg = 0; + u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); @@ -3484,6 +3507,12 @@ em_initialize_transmit_unit(struct adapt E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txr->queue_status = EM_QUEUE_IDLE; + txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i)); + txdctl |= 0x1f; /* PTHRESH */ + txdctl |= 1 << 8; /* HTHRESH */ + txdctl |= 1 << 16;/* WTHRESH */ + txdctl |= E1000_TXDCTL_QUEUE_ENABLE; + E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ @@ -3943,10 +3972,18 @@ em_txeof(struct tx_ring *txr) ** will examine this and do a reset if needed. */ if ((!processed) && ((ticks - txr->watchdog_time) > EM_WATCHDOG)) - txr->queue_status |= EM_QUEUE_HUNG; + txr->queue_status = EM_QUEUE_HUNG; - if (txr->tx_avail >= (adapter->num_tx_desc / 8)) - txr->queue_status &= ~EM_QUEUE_DEPLETED; + /* + * If we have a minimum free, clear IFF_DRV_OACTIVE + * to tell the stack that it is OK to send packets. + * Notice that all writes of OACTIVE happen under the + * TX lock which, with a single queue, guarantees + * sanity. + */ + if (txr->tx_avail >= EM_MAX_SCATTER) { + if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE); + } /* Disable watchdog if all clean */ if (txr->tx_avail == adapter->num_tx_desc) { @@ -4371,7 +4408,7 @@ em_initialize_receive_unit(struct adapte reta = 0; for (i = 0; i < 4; ++i) { uint32_t q; - q = (i % 2) << 7; + q = (i % adapter->num_rx_queues) << 7; reta |= q << (8 * i); } for (i = 0; i < 32; ++i) @@ -4419,7 +4456,14 @@ em_initialize_receive_unit(struct adapte #endif /* DEV_NETMAP */ E1000_WRITE_REG(hw, E1000_RDT(i), rdt); } - /* Set PTHRESH for improved jumbo performance */ + + /* + * Set PTHRESH for improved jumbo performance + * According to 10.2.5.11 of Intel 82574 Datasheet, + * RXDCTL(1) is written whenever RXDCTL(0) is written. + * Only write to RXDCTL(1) if there is a need for different + * settings. + */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && @@ -5860,21 +5904,25 @@ em_print_debug_info(struct adapter *adap else printf("and ACTIVE\n"); - device_printf(dev, "hw tdh = %d, hw tdt = %d\n", - E1000_READ_REG(&adapter->hw, E1000_TDH(0)), - E1000_READ_REG(&adapter->hw, E1000_TDT(0))); - device_printf(dev, "hw rdh = %d, hw rdt = %d\n", - E1000_READ_REG(&adapter->hw, E1000_RDH(0)), - E1000_READ_REG(&adapter->hw, E1000_RDT(0))); - device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status); - device_printf(dev, "TX descriptors avail = %d\n", - txr->tx_avail); - device_printf(dev, "Tx Descriptors avail failure = %ld\n", - txr->no_desc_avail); - device_printf(dev, "RX discarded packets = %ld\n", - rxr->rx_discarded); - device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); - device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); + for (int i = 0; i < adapter->num_tx_queues; i++, txr++) { + device_printf(dev, "hw tdh = %d, hw tdt = %d\n", + E1000_READ_REG(&adapter->hw, E1000_TDH(i)), + E1000_READ_REG(&adapter->hw, E1000_TDT(i))); + device_printf(dev, "Tx Queue Status = %d\n", txr->queue_status); + device_printf(dev, "TX descriptors avail = %d\n", + txr->tx_avail); + device_printf(dev, "Tx Descriptors avail failure = %ld\n", + txr->no_desc_avail); + } + for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) { + device_printf(dev, "hw rdh = %d, hw rdt = %d\n", + E1000_READ_REG(&adapter->hw, E1000_RDH(i)), + E1000_READ_REG(&adapter->hw, E1000_RDT(i))); + device_printf(dev, "RX discarded packets = %ld\n", + rxr->rx_discarded); + device_printf(dev, "RX Next to Check = %d\n", rxr->next_to_check); + device_printf(dev, "RX Next to Refresh = %d\n", rxr->next_to_refresh); + } } #ifdef EM_MULTIQUEUE Modified: projects/em_mq/sys/dev/e1000/if_em.h ============================================================================== --- projects/em_mq/sys/dev/e1000/if_em.h Thu Apr 16 06:12:25 2015 (r281589) +++ projects/em_mq/sys/dev/e1000/if_em.h Thu Apr 16 06:23:58 2015 (r281590) @@ -191,7 +191,6 @@ #define EM_QUEUE_IDLE 0 #define EM_QUEUE_WORKING 1 #define EM_QUEUE_HUNG 2 -#define EM_QUEUE_DEPLETED 4 /* * TDBA/RDBA should be aligned on 16 byte boundary. But TDLEN/RDLEN should be
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201504160623.t3G6NxEf062304>