Date: Wed, 17 Jun 2015 18:50:58 +0000 (UTC) From: Sean Bruno <sbruno@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r284522 - in stable/10: share/man/man4 sys/conf sys/dev/e1000 sys/dev/netmap sys/modules/em Message-ID: <201506171850.t5HIow3k064693@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: sbruno Date: Wed Jun 17 18:50:57 2015 New Revision: 284522 URL: https://svnweb.freebsd.org/changeset/base/284522 Log: MFC r284179, r283959 Implement multiqueue (max 2 tx/rx queues) for the 82574L chipset. Change default tuning parameters to handle this new configuration if EM_MULTIQUEUE is set in the kernel configuration. Off by default. See r283959 changelog for the scope of these changes. Relnotes: Yes Sponsored by: Limelight Networks Modified: stable/10/share/man/man4/em.4 stable/10/sys/conf/NOTES stable/10/sys/conf/options stable/10/sys/dev/e1000/e1000_defines.h stable/10/sys/dev/e1000/if_em.c stable/10/sys/dev/e1000/if_em.h stable/10/sys/dev/netmap/if_em_netmap.h stable/10/sys/modules/em/Makefile Directory Properties: stable/10/ (props changed) Modified: stable/10/share/man/man4/em.4 ============================================================================== --- stable/10/share/man/man4/em.4 Wed Jun 17 18:22:52 2015 (r284521) +++ stable/10/share/man/man4/em.4 Wed Jun 17 18:50:57 2015 (r284522) @@ -45,6 +45,14 @@ kernel configuration file: .Cd "device em" .Ed .Pp +Optional multiqueue support is available via the following kernel +compile options: +.Bd -ragged -offset indent +.Cd "options EM_MULTIQUEUE" +.Ed +.Pp +Note: Activating EM_MULTIQUEUE support is not supported by Intel. +.Pp Alternatively, to load the driver as a module at boot time, place the following line in .Xr loader.conf 5 : @@ -197,6 +205,18 @@ Tunables can be set at the prompt before booting the kernel or stored in .Xr loader.conf 5 . .Bl -tag -width indent +.It Va hw.em.eee_setting +Disable or enable Energy Efficient Ethernet. +Default 1 (disabled). +.It Va hw.em.msix +Enable or disable MSI-X style interrupts. +Default 1 (enabled). +.It Va hw.em.smart_pwr_down +Enable or disable smart power down features on newer adapters. +Default 0 (disabled). +.It Va hw.em.sbp +Show bad packets when in promiscuous mode. +Default 0 (off). .It Va hw.em.rxd Number of receive descriptors allocated by the driver. The default value is 1024 for adapters newer than 82547, @@ -228,6 +248,11 @@ If .Va hw.em.tx_int_delay is non-zero, this tunable limits the maximum delay in which a transmit interrupt is generated. +.It Va hw.em.num_queues +Number of hardware queues that will be configured on this adapter (maximum of 2) +Defaults to 1. +Only valid with kernel configuration +.Cd "options EM_MULTIQUEUE". .El .Sh FILES .Bl -tag -width /dev/led/em* @@ -287,3 +312,5 @@ You can enable it on an .Nm interface using .Xr ifconfig 8 . +.Pp +Activating EM_MULTIQUEUE support requires MSI-X features. Modified: stable/10/sys/conf/NOTES ============================================================================== --- stable/10/sys/conf/NOTES Wed Jun 17 18:22:52 2015 (r284521) +++ stable/10/sys/conf/NOTES Wed Jun 17 18:50:57 2015 (r284522) @@ -2990,3 +2990,6 @@ options RANDOM_YARROW # Yarrow RNG ##options RANDOM_FORTUNA # Fortuna RNG - not yet implemented options RANDOM_DEBUG # Debugging messages options RANDOM_RWFILE # Read and write entropy cache + +# Intel em(4) driver +options EM_MULTIQUEUE # Activate multiqueue features/disable MSI-X Modified: stable/10/sys/conf/options ============================================================================== --- stable/10/sys/conf/options Wed Jun 17 18:22:52 2015 (r284521) +++ stable/10/sys/conf/options Wed Jun 17 18:50:57 2015 (r284522) @@ -934,3 +934,6 @@ RANDOM_YARROW opt_random.h RANDOM_FORTUNA opt_random.h RANDOM_DEBUG opt_random.h RANDOM_RWFILE opt_random.h + +# Intel em(4) driver +EM_MULTIQUEUE opt_em.h Modified: stable/10/sys/dev/e1000/e1000_defines.h ============================================================================== --- stable/10/sys/dev/e1000/e1000_defines.h Wed Jun 17 18:22:52 2015 (r284521) +++ stable/10/sys/dev/e1000/e1000_defines.h Wed Jun 17 18:50:57 2015 (r284522) @@ -158,11 +158,12 @@ E1000_RXDEXT_STATERR_CXE | \ E1000_RXDEXT_STATERR_RXE) -#define E1000_MRQC_ENABLE_RSS_2Q 0x00000001 +#define E1000_MRQC_RSS_ENABLE_2Q 0x00000001 #define E1000_MRQC_RSS_FIELD_MASK 0xFFFF0000 #define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 #define E1000_MRQC_RSS_FIELD_IPV4 0x00020000 #define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 +#define E1000_MRQC_RSS_FIELD_IPV6_EX 0x00080000 #define E1000_MRQC_RSS_FIELD_IPV6 0x00100000 #define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 Modified: stable/10/sys/dev/e1000/if_em.c ============================================================================== --- stable/10/sys/dev/e1000/if_em.c Wed Jun 17 18:22:52 2015 (r284521) +++ stable/10/sys/dev/e1000/if_em.c Wed Jun 17 18:50:57 2015 (r284522) @@ -32,6 +32,8 @@ ******************************************************************************/ /*$FreeBSD$*/ +#include "opt_em.h" +#include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" @@ -41,6 +43,10 @@ #include <sys/param.h> #include <sys/systm.h> +#ifdef DDB +#include <sys/types.h> +#include <ddb/ddb.h> +#endif #if __FreeBSD_version >= 800000 #include <sys/buf_ring.h> #endif @@ -52,6 +58,7 @@ #include <sys/mbuf.h> #include <sys/module.h> #include <sys/rman.h> +#include <sys/smp.h> #include <sys/socket.h> #include <sys/sockio.h> #include <sys/sysctl.h> @@ -207,7 +214,7 @@ static int em_resume(device_t); #ifdef EM_MULTIQUEUE static int em_mq_start(struct ifnet *, struct mbuf *); static int em_mq_start_locked(struct ifnet *, - struct tx_ring *, struct mbuf *); + struct tx_ring *); static void em_qflush(struct ifnet *); #else static void em_start(struct ifnet *); @@ -297,6 +304,10 @@ static void em_handle_tx(void *context, static void em_handle_rx(void *context, int pending); static void em_handle_link(void *context, int pending); +#ifdef EM_MULTIQUEUE +static void em_enable_vectors_82574(struct adapter *); +#endif + static void em_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int em_set_flowcntl(SYSCTL_HANDLER_ARGS); @@ -395,6 +406,20 @@ TUNABLE_INT("hw.em.enable_msix", &em_ena SYSCTL_INT(_hw_em, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &em_enable_msix, 0, "Enable MSI-X interrupts"); +#ifdef EM_MULTIQUEUE +static int em_num_queues = 1; +TUNABLE_INT("hw.em.num_queues", &em_num_queues); +SYSCTL_INT(_hw_em, OID_AUTO, num_queues, CTLFLAG_RDTUN, &em_num_queues, 0, + "82574 only: Number of queues to configure, 0 indicates autoconfigure"); +#endif + +/* +** Global variable to store last used CPU when binding queues +** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a +** queue is bound to a cpu. +*/ +static int em_last_bind_cpu = -1; + /* How many packets rxeof tries to clean at a time */ static int em_rx_process_limit = 100; TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit); @@ -429,10 +454,10 @@ static int em_probe(device_t dev) { char adapter_name[60]; - u16 pci_vendor_id = 0; - u16 pci_device_id = 0; - u16 pci_subvendor_id = 0; - u16 pci_subdevice_id = 0; + uint16_t pci_vendor_id = 0; + uint16_t pci_device_id = 0; + uint16_t pci_subvendor_id = 0; + uint16_t pci_subdevice_id = 0; em_vendor_info_t *ent; INIT_DEBUGOUT("em_probe: begin"); @@ -559,6 +584,11 @@ em_attach(device_t dev) goto err_pci; } + /* + * Setup MSI/X or MSI if PCI Express + */ + adapter->msix = em_setup_msix(adapter); + e1000_get_bus_info(hw); /* Set up some sysctls for the tunable interrupt delays */ @@ -890,7 +920,7 @@ em_resume(device_t dev) EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr, NULL); + em_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); @@ -904,7 +934,70 @@ em_resume(device_t dev) } -#ifdef EM_MULTIQUEUE +#ifndef EM_MULTIQUEUE +static void +em_start_locked(struct ifnet *ifp, struct tx_ring *txr) +{ + struct adapter *adapter = ifp->if_softc; + struct mbuf *m_head; + + EM_TX_LOCK_ASSERT(txr); + + if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return; + + if (!adapter->link_active) + return; + + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + /* Call cleanup if number of TX descriptors low */ + if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) + em_txeof(txr); + if (txr->tx_avail < EM_MAX_SCATTER) { + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + break; + } + IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); + if (m_head == NULL) + break; + /* + * Encapsulation can modify our pointer, and or make it + * NULL on failure. In that event, we can't requeue. + */ + if (em_xmit(txr, &m_head)) { + if (m_head == NULL) + break; + IFQ_DRV_PREPEND(&ifp->if_snd, m_head); + break; + } + + /* Mark the queue as having work */ + if (txr->busy == EM_TX_IDLE) + txr->busy = EM_TX_BUSY; + + /* Send a copy of the frame to the BPF listener */ + ETHER_BPF_MTAP(ifp, m_head); + + } + + return; +} + +static void +em_start(struct ifnet *ifp) +{ + struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr = adapter->tx_rings; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + EM_TX_LOCK(txr); + em_start_locked(ifp, txr); + EM_TX_UNLOCK(txr); + } + return; +} +#else /* EM_MULTIQUEUE */ /********************************************************************* * Multiqueue Transmit routines * @@ -913,8 +1006,38 @@ em_resume(device_t dev) * than do an immediate send. It is this that is an advantage * in this driver, rather than also having multiple tx queues. **********************************************************************/ +/* +** Multiqueue capable stack interface +*/ static int -em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m) +em_mq_start(struct ifnet *ifp, struct mbuf *m) +{ + struct adapter *adapter = ifp->if_softc; + struct tx_ring *txr = adapter->tx_rings; + unsigned int i, error; + + if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) + i = m->m_pkthdr.flowid % adapter->num_queues; + else + i = curcpu % adapter->num_queues; + + txr = &adapter->tx_rings[i]; + + error = drbr_enqueue(ifp, txr->br, m); + if (error) + return (error); + + if (EM_TX_TRYLOCK(txr)) { + em_mq_start_locked(ifp, txr); + EM_TX_UNLOCK(txr); + } else + taskqueue_enqueue(txr->tq, &txr->tx_task); + + return (0); +} + +static int +em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) { struct adapter *adapter = txr->adapter; struct mbuf *next; @@ -922,25 +1045,23 @@ em_mq_start_locked(struct ifnet *ifp, st if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING || adapter->link_active == 0) { - if (m != NULL) - err = drbr_enqueue(ifp, txr->br, m); - return (err); + return (ENETDOWN); } - enq = 0; - if (m != NULL) { - err = drbr_enqueue(ifp, txr->br, m); - if (err) - return (err); - } - /* Process the queue */ while ((next = drbr_peek(ifp, txr->br)) != NULL) { if ((err = em_xmit(txr, &next)) != 0) { - if (next == NULL) + if (next == NULL) { + /* It was freed, move forward */ drbr_advance(ifp, txr->br); - else + } else { + /* + * Still have one left, it may not be + * the same since the transmit function + * may have changed it. + */ drbr_putback(ifp, txr->br, next); + } break; } drbr_advance(ifp, txr->br); @@ -959,31 +1080,13 @@ em_mq_start_locked(struct ifnet *ifp, st if (txr->tx_avail < EM_MAX_SCATTER) em_txeof(txr); - if (txr->tx_avail < EM_MAX_SCATTER) + if (txr->tx_avail < EM_MAX_SCATTER) { ifp->if_drv_flags |= IFF_DRV_OACTIVE; + } return (err); } /* -** Multiqueue capable stack interface -*/ -static int -em_mq_start(struct ifnet *ifp, struct mbuf *m) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - int error; - - if (EM_TX_TRYLOCK(txr)) { - error = em_mq_start_locked(ifp, txr, m); - EM_TX_UNLOCK(txr); - } else - error = drbr_enqueue(ifp, txr->br, m); - - return (error); -} - -/* ** Flush all ring buffers */ static void @@ -1001,69 +1104,6 @@ em_qflush(struct ifnet *ifp) } if_qflush(ifp); } -#else /* !EM_MULTIQUEUE */ - -static void -em_start_locked(struct ifnet *ifp, struct tx_ring *txr) -{ - struct adapter *adapter = ifp->if_softc; - struct mbuf *m_head; - - EM_TX_LOCK_ASSERT(txr); - - if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING) - return; - - if (!adapter->link_active) - return; - - while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { - /* Call cleanup if number of TX descriptors low */ - if (txr->tx_avail <= EM_TX_CLEANUP_THRESHOLD) - em_txeof(txr); - if (txr->tx_avail < EM_MAX_SCATTER) { - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - break; - } - IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); - if (m_head == NULL) - break; - /* - * Encapsulation can modify our pointer, and or make it - * NULL on failure. In that event, we can't requeue. - */ - if (em_xmit(txr, &m_head)) { - if (m_head == NULL) - break; - IFQ_DRV_PREPEND(&ifp->if_snd, m_head); - break; - } - - /* Mark the queue as having work */ - if (txr->busy == EM_TX_IDLE) - txr->busy = EM_TX_BUSY; - - /* Send a copy of the frame to the BPF listener */ - ETHER_BPF_MTAP(ifp, m_head); - } - - return; -} - -static void -em_start(struct ifnet *ifp) -{ - struct adapter *adapter = ifp->if_softc; - struct tx_ring *txr = adapter->tx_rings; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - EM_TX_LOCK(txr); - em_start_locked(ifp, txr); - EM_TX_UNLOCK(txr); - } - return; -} #endif /* EM_MULTIQUEUE */ /********************************************************************* @@ -1460,7 +1500,7 @@ em_poll(struct ifnet *ifp, enum poll_cmd em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr, NULL); + em_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); @@ -1527,14 +1567,14 @@ em_handle_que(void *context, int pending struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { bool more = em_rxeof(rxr, adapter->rx_process_limit, NULL); + EM_TX_LOCK(txr); em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr, NULL); + em_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); @@ -1568,11 +1608,12 @@ em_msix_tx(void *arg) em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr, NULL); + em_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); #endif + /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, txr->ims); EM_TX_UNLOCK(txr); @@ -1598,9 +1639,10 @@ em_msix_rx(void *arg) more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); - else + else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); + } return; } @@ -1627,6 +1669,16 @@ em_msix_link(void *arg) } else E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); + /* + ** Because we must read the ICR for this interrupt + ** it may clear other causes using autoclear, for + ** this reason we simply create a soft interrupt + ** for all these vectors. + */ + if (reg_icr) { + E1000_WRITE_REG(&adapter->hw, + E1000_ICS, adapter->ims); + } return; } @@ -1640,9 +1692,10 @@ em_handle_rx(void *context, int pending) more = em_rxeof(rxr, adapter->rx_process_limit, NULL); if (more) taskqueue_enqueue(rxr->tq, &rxr->rx_task); - else + else { /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_IMS, rxr->ims); + } } static void @@ -1656,7 +1709,7 @@ em_handle_tx(void *context, int pending) em_txeof(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr, NULL); + em_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); @@ -1686,7 +1739,7 @@ em_handle_link(void *context, int pendin EM_TX_LOCK(txr); #ifdef EM_MULTIQUEUE if (!drbr_empty(ifp, txr->br)) - em_mq_start_locked(ifp, txr, NULL); + em_mq_start_locked(ifp, txr); #else if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp, txr); @@ -2269,7 +2322,7 @@ em_local_timer(void *arg) struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; - u32 trigger; + u32 trigger = 0; EM_CORE_LOCK_ASSERT(adapter); @@ -2282,9 +2335,11 @@ em_local_timer(void *arg) e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); /* Mask to use in the irq trigger */ - if (adapter->msix_mem) - trigger = rxr->ims; - else + if (adapter->msix_mem) { + for (int i = 0; i < adapter->num_queues; i++, rxr++) + trigger |= rxr->ims; + rxr = adapter->rx_rings; + } else trigger = E1000_ICS_RXDMT0; /* @@ -2293,7 +2348,6 @@ em_local_timer(void *arg) ** and the HUNG state will be static if set. */ for (int i = 0; i < adapter->num_queues; i++, txr++) { - /* Last cycle a queue was declared hung */ if (txr->busy == EM_TX_HUNG) goto hung; if (txr->busy >= EM_TX_MAXTRIES) @@ -2311,14 +2365,9 @@ em_local_timer(void *arg) return; hung: /* Looks like we're hung */ - device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); - device_printf(adapter->dev, - "Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, - E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), - E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); - device_printf(adapter->dev,"TX(%d) desc avail = %d," - "Next TX to Clean = %d\n", - txr->me, txr->tx_avail, txr->next_to_clean); + device_printf(adapter->dev, "Watchdog timeout Queue[%d]-- resetting\n", + txr->me); + em_print_debug_info(adapter); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; em_init_locked(adapter); @@ -2370,7 +2419,7 @@ em_update_link_status(struct adapter *ad (hw->mac.type == e1000_82572))) { int tarc0; tarc0 = E1000_READ_REG(hw, E1000_TARC(0)); - tarc0 &= ~SPEED_MODE_BIT; + tarc0 &= ~TARC_SPEED_MODE_BIT; E1000_WRITE_REG(hw, E1000_TARC(0), tarc0); } if (bootverbose) @@ -2486,14 +2535,6 @@ em_allocate_pci_resources(struct adapter rman_get_bushandle(adapter->memory); adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; - /* Default to a single queue */ - adapter->num_queues = 1; - - /* - * Setup MSI/X or MSI if PCI Express - */ - adapter->msix = em_setup_msix(adapter); - adapter->hw.back = &adapter->osdep; return (0); @@ -2568,13 +2609,14 @@ em_allocate_msix(struct adapter *adapter struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; int error, rid, vector = 0; + int cpu_id = 0; /* Make sure all interrupts are disabled */ E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); /* First set up ring resources */ - for (int i = 0; i < adapter->num_queues; i++, txr++, rxr++) { + for (int i = 0; i < adapter->num_queues; i++, rxr++, vector++) { /* RX ring */ rid = vector + 1; @@ -2594,14 +2636,20 @@ em_allocate_msix(struct adapter *adapter return (error); } #if __FreeBSD_version >= 800504 - bus_describe_intr(dev, rxr->res, rxr->tag, "rx %d", i); + bus_describe_intr(dev, rxr->res, rxr->tag, "rx%d", i); #endif - rxr->msix = vector++; /* NOTE increment vector for TX */ + rxr->msix = vector; + + if (em_last_bind_cpu < 0) + em_last_bind_cpu = CPU_FIRST(); + cpu_id = em_last_bind_cpu; + bus_bind_intr(dev, rxr->res, cpu_id); + TASK_INIT(&rxr->rx_task, 0, em_handle_rx, rxr); rxr->tq = taskqueue_create_fast("em_rxq", M_NOWAIT, taskqueue_thread_enqueue, &rxr->tq); - taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq", - device_get_nameunit(adapter->dev)); + taskqueue_start_threads(&rxr->tq, 1, PI_NET, "%s rxq (cpuid %d)", + device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 20 and 21 @@ -2609,8 +2657,13 @@ em_allocate_msix(struct adapter *adapter ** NOTHING to do with the MSIX vector */ rxr->ims = 1 << (20 + i); + adapter->ims |= rxr->ims; adapter->ivars |= (8 | rxr->msix) << (i * 4); + em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); + } + + for (int i = 0; i < adapter->num_queues; i++, txr++, vector++) { /* TX ring */ rid = vector + 1; txr->res = bus_alloc_resource_any(dev, @@ -2628,14 +2681,20 @@ em_allocate_msix(struct adapter *adapter return (error); } #if __FreeBSD_version >= 800504 - bus_describe_intr(dev, txr->res, txr->tag, "tx %d", i); + bus_describe_intr(dev, txr->res, txr->tag, "tx%d", i); #endif - txr->msix = vector++; /* Increment vector for next pass */ + txr->msix = vector; + + if (em_last_bind_cpu < 0) + em_last_bind_cpu = CPU_FIRST(); + cpu_id = em_last_bind_cpu; + bus_bind_intr(dev, txr->res, cpu_id); + TASK_INIT(&txr->tx_task, 0, em_handle_tx, txr); txr->tq = taskqueue_create_fast("em_txq", M_NOWAIT, taskqueue_thread_enqueue, &txr->tq); - taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq", - device_get_nameunit(adapter->dev)); + taskqueue_start_threads(&txr->tq, 1, PI_NET, "%s txq (cpuid %d)", + device_get_nameunit(adapter->dev), cpu_id); /* ** Set the bit to enable interrupt ** in E1000_IMS -- bits 22 and 23 @@ -2643,13 +2702,16 @@ em_allocate_msix(struct adapter *adapter ** NOTHING to do with the MSIX vector */ txr->ims = 1 << (22 + i); + adapter->ims |= txr->ims; adapter->ivars |= (8 | txr->msix) << (8 + (i * 4)); + + em_last_bind_cpu = CPU_NEXT(em_last_bind_cpu); } /* Link interrupt */ - ++rid; + rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, - SYS_RES_IRQ, &rid, RF_ACTIVE); + SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev,"Unable to allocate " "bus resource: Link interrupt [%d]\n", rid); @@ -2665,7 +2727,7 @@ em_allocate_msix(struct adapter *adapter return (error); } #if __FreeBSD_version >= 800504 - bus_describe_intr(dev, adapter->res, adapter->tag, "link"); + bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->linkvec = vector; adapter->ivars |= (8 | vector) << 16; @@ -2689,9 +2751,8 @@ em_free_pci_resources(struct adapter *ad */ for (int i = 0; i < adapter->num_queues; i++) { txr = &adapter->tx_rings[i]; - rxr = &adapter->rx_rings[i]; /* an early abort? */ - if ((txr == NULL) || (rxr == NULL)) + if (txr == NULL) break; rid = txr->msix +1; if (txr->tag != NULL) { @@ -2701,6 +2762,11 @@ em_free_pci_resources(struct adapter *ad if (txr->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res); + + rxr = &adapter->rx_rings[i]; + /* an early abort? */ + if (rxr == NULL) + break; rid = rxr->msix +1; if (rxr->tag != NULL) { bus_teardown_intr(dev, rxr->res, rxr->tag); @@ -2750,14 +2816,19 @@ em_setup_msix(struct adapter *adapter) device_t dev = adapter->dev; int val; + /* Nearly always going to use one queue */ + adapter->num_queues = 1; + /* - ** Setup MSI/X for Hartwell: tests have shown - ** use of two queues to be unstable, and to - ** provide no great gain anyway, so we simply - ** seperate the interrupts and use a single queue. + ** Try using MSI-X for Hartwell adapters */ if ((adapter->hw.mac.type == e1000_82574) && (em_enable_msix == TRUE)) { +#ifdef EM_MULTIQUEUE + adapter->num_queues = (em_num_queues == 1) ? 1 : 2; + if (adapter->num_queues > 1) + em_enable_vectors_82574(adapter); +#endif /* Map the MSIX BAR */ int rid = PCIR_BAR(EM_MSIX_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, @@ -2769,16 +2840,34 @@ em_setup_msix(struct adapter *adapter) goto msi; } val = pci_msix_count(dev); - /* We only need/want 3 vectors */ - if (val >= 3) - val = 3; - else { - device_printf(adapter->dev, - "MSIX: insufficient vectors, using MSI\n"); - goto msi; + +#ifdef EM_MULTIQUEUE + /* We need 5 vectors in the multiqueue case */ + if (adapter->num_queues > 1 ) { + if (val >= 5) + val = 5; + else { + adapter->num_queues = 1; + device_printf(adapter->dev, + "Insufficient MSIX vectors for >1 queue, " + "using single queue...\n"); + goto msix_one; + } + } else { +msix_one: +#endif + if (val >= 3) + val = 3; + else { + device_printf(adapter->dev, + "Insufficient MSIX vectors, using MSI\n"); + goto msi; + } +#ifdef EM_MULTIQUEUE } +#endif - if ((pci_alloc_msix(dev, &val) == 0) && (val == 3)) { + if ((pci_alloc_msix(dev, &val) == 0)) { device_printf(adapter->dev, "Using MSIX interrupts " "with %d vectors\n", val); @@ -2799,7 +2888,7 @@ msi: } val = 1; if (pci_alloc_msi(dev, &val) == 0) { - device_printf(adapter->dev,"Using an MSI interrupt\n"); + device_printf(adapter->dev, "Using an MSI interrupt\n"); return (val); } /* Should only happen due to manual configuration */ @@ -3441,7 +3530,7 @@ em_initialize_transmit_unit(struct adapt { struct tx_ring *txr = adapter->tx_rings; struct e1000_hw *hw = &adapter->hw; - u32 tctl, tarc, tipg = 0; + u32 tctl, txdctl = 0, tarc, tipg = 0; INIT_DEBUGOUT("em_initialize_transmit_unit: begin"); @@ -3463,6 +3552,15 @@ em_initialize_transmit_unit(struct adapt E1000_READ_REG(&adapter->hw, E1000_TDLEN(i))); txr->busy = EM_TX_IDLE; + txdctl = 0; /* clear txdctl */ + txdctl |= 0x1f; /* PTHRESH */ + txdctl |= 1 << 8; /* HTHRESH */ + txdctl |= 1 << 16;/* WTHRESH */ + txdctl |= 1 << 22; /* Reserved bit 22 must always be 1 */ + txdctl |= E1000_TXDCTL_GRAN; + txdctl |= 1 << 25; /* LWTHRESH */ + + E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); } /* Set the default values for the Tx Inter Packet Gap timer */ @@ -3493,15 +3591,25 @@ em_initialize_transmit_unit(struct adapt if ((adapter->hw.mac.type == e1000_82571) || (adapter->hw.mac.type == e1000_82572)) { tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); - tarc |= SPEED_MODE_BIT; + tarc |= TARC_SPEED_MODE_BIT; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } else if (adapter->hw.mac.type == e1000_80003es2lan) { + /* errata: program both queues to unweighted RR */ tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(1)); tarc |= 1; E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); + } else if (adapter->hw.mac.type == e1000_82574) { + tarc = E1000_READ_REG(&adapter->hw, E1000_TARC(0)); + tarc |= TARC_ERRATA_BIT; + if ( adapter->num_queues > 1) { + tarc |= (TARC_COMPENSATION_MODE | TARC_MQ_FIX); + E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); + E1000_WRITE_REG(&adapter->hw, E1000_TARC(1), tarc); + } else + E1000_WRITE_REG(&adapter->hw, E1000_TARC(0), tarc); } adapter->txd_cmd = E1000_TXD_CMD_IFCS; @@ -3932,8 +4040,9 @@ em_txeof(struct tx_ring *txr) * TX lock which, with a single queue, guarantees * sanity. */ - if (txr->tx_avail >= EM_MAX_SCATTER) + if (txr->tx_avail >= EM_MAX_SCATTER) { ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + } /* Disable hang detection if all clean */ if (txr->tx_avail == adapter->num_tx_desc) @@ -4305,6 +4414,9 @@ em_initialize_receive_unit(struct adapte E1000_WRITE_REG(&adapter->hw, E1000_RADV, adapter->rx_abs_int_delay.value); + + E1000_WRITE_REG(&adapter->hw, E1000_RDTR, + adapter->rx_int_delay.value); /* * Set the interrupt throttling rate. Value is calculated * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns) @@ -4316,20 +4428,65 @@ em_initialize_receive_unit(struct adapte ** using the EITR register (82574 only) */ if (hw->mac.type == e1000_82574) { + u32 rfctl; for (int i = 0; i < 4; i++) E1000_WRITE_REG(hw, E1000_EITR_82574(i), DEFAULT_ITR); /* Disable accelerated acknowledge */ - E1000_WRITE_REG(hw, E1000_RFCTL, E1000_RFCTL_ACK_DIS); + rfctl = E1000_READ_REG(hw, E1000_RFCTL); + rfctl |= E1000_RFCTL_ACK_DIS; + E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); } rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); - if (ifp->if_capenable & IFCAP_RXCSUM) + if (ifp->if_capenable & IFCAP_RXCSUM) { +#ifdef EM_MULTIQUEUE + rxcsum |= E1000_RXCSUM_TUOFL | + E1000_RXCSUM_IPOFL | + E1000_RXCSUM_PCSD; +#else rxcsum |= E1000_RXCSUM_TUOFL; - else +#endif + } else rxcsum &= ~E1000_RXCSUM_TUOFL; + E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); +#ifdef EM_MULTIQUEUE + if (adapter->num_queues > 1) { + uint32_t rss_key[10]; + uint32_t reta; + int i; + + /* + * Configure RSS key + */ + arc4rand(rss_key, sizeof(rss_key), 0); + for (i = 0; i < 10; ++i) + E1000_WRITE_REG_ARRAY(hw,E1000_RSSRK(0), i, rss_key[i]); + + /* + * Configure RSS redirect table in following fashion: + * (hash & ring_cnt_mask) == rdr_table[(hash & rdr_table_mask)] + */ + reta = 0; + for (i = 0; i < 4; ++i) { + uint32_t q; + q = (i % adapter->num_queues) << 7; + reta |= q << (8 * i); + } + for (i = 0; i < 32; ++i) + E1000_WRITE_REG(hw, E1000_RETA(i), reta); + + E1000_WRITE_REG(hw, E1000_MRQC, E1000_MRQC_RSS_ENABLE_2Q | + E1000_MRQC_RSS_FIELD_IPV4_TCP | + E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX | + E1000_MRQC_RSS_FIELD_IPV6_EX | + E1000_MRQC_RSS_FIELD_IPV6 | + E1000_MRQC_RSS_FIELD_IPV6_TCP); + } +#endif /* ** XXX TEMPORARY WORKAROUND: on some systems with 82573 ** long latencies are observed, like Lenovo X60. This @@ -4362,13 +4519,30 @@ em_initialize_receive_unit(struct adapte E1000_WRITE_REG(hw, E1000_RDT(i), rdt); } - /* Set PTHRESH for improved jumbo performance */ + /* + * Set PTHRESH for improved jumbo performance + * According to 10.2.5.11 of Intel 82574 Datasheet, + * RXDCTL(1) is written whenever RXDCTL(0) is written. + * Only write to RXDCTL(1) if there is a need for different + * settings. + */ if (((adapter->hw.mac.type == e1000_ich9lan) || (adapter->hw.mac.type == e1000_pch2lan) || (adapter->hw.mac.type == e1000_ich10lan)) && (ifp->if_mtu > ETHERMTU)) { u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(0)); E1000_WRITE_REG(hw, E1000_RXDCTL(0), rxdctl | 3); + } else if ((adapter->hw.mac.type == e1000_82574) && + (ifp->if_mtu > ETHERMTU)) { + for (int i = 0; i < adapter->num_queues; i++) { + u32 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); + + rxdctl |= 0x20; /* PTHRESH */ + rxdctl |= 4 << 8; /* HTHRESH */ + rxdctl |= 4 << 16;/* WTHRESH */ + rxdctl |= 1 << 24; /* Switch to granularity */ + E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); + } } *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201506171850.t5HIow3k064693>