Date: Wed, 03 Oct 2007 14:45:39 -0700 From: LI Xin <delphij@delphij.net> To: Vladimir Ivanov <wawa@yandex-team.ru> Cc: "freebsd-net@freebsd.org" <freebsd-net@FreeBSD.org>, Jack Vogel <jfvogel@gmail.com> Subject: Re: SMPable version of EM driver Message-ID: <47040D83.9010706@delphij.net> In-Reply-To: <47037246.2070400@yandex-team.ru> References: <46B07931.3080300@yandex-team.ru> <2a41acea0708010923m7b21095ajc2ee84c37e0d5354@mail.gmail.com> <470280F6.9070009@yandex-team.ru> <20071003111737.U14276@delplex.bde.org> <47037246.2070400@yandex-team.ru>
next in thread | previous in thread | raw e-mail | index | archive | help
This is an OpenPGP/MIME signed message (RFC 2440 and 3156) --------------enig19778564B8230D4BDB26AB74 Content-Type: multipart/mixed; boundary="------------060708020602090500090901" This is a multi-part message in MIME format. --------------060708020602090500090901 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable Hi Valdimir and Jack, I have ported Valdimir's 1.16 revision of their driver to -CURRENT code as of today, but I don't have a box that is suitable for testing right now as I just moved, and the server I used to do FreeBSD coding stuff is located several thousand miles away :-) I hope that this would be useful for adoption to the official em(4) driver, and thanks Valdimir and Yandex for their work on this. Cheers, --=20 Xin LI <delphij@delphij.net> http://www.delphij.net/ FreeBSD - The Power to Serve! --------------060708020602090500090901 Content-Type: text/plain; x-mac-type="0"; x-mac-creator="0"; name="em.diff" Content-Transfer-Encoding: quoted-printable Content-Disposition: inline; filename="em.diff" Index: e1000_defines.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /home/ncvs/src/sys/dev/em/e1000_defines.h,v retrieving revision 1.3 diff -u -p -r1.3 e1000_defines.h --- e1000_defines.h 16 May 2007 00:14:23 -0000 1.3 +++ e1000_defines.h 3 Oct 2007 21:36:07 -0000 @@ -746,7 +746,6 @@ */ #define IMS_ENABLE_MASK ( \ E1000_IMS_RXT0 | \ - E1000_IMS_TXDW | \ E1000_IMS_RXDMT0 | \ E1000_IMS_RXSEQ | \ E1000_IMS_LSC) Index: if_em.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /home/ncvs/src/sys/dev/em/if_em.c,v retrieving revision 1.184 diff -u -p -r1.184 if_em.c --- if_em.c 10 Sep 2007 21:50:40 -0000 1.184 +++ if_em.c 3 Oct 2007 21:41:12 -0000 @@ -240,14 +240,16 @@ static void em_initialize_transmit_unit( static int em_setup_receive_structures(struct adapter *); static void em_initialize_receive_unit(struct adapter *); static void em_enable_intr(struct adapter *); +static void em_enable_intr_rx(struct adapter *); static void em_disable_intr(struct adapter *); +static void em_disable_intr_rx(struct adapter *); static void em_free_transmit_structures(struct adapter *); static void em_free_receive_structures(struct adapter *); static void em_update_stats_counters(struct adapter *); static void em_txeof(struct adapter *); static int em_allocate_receive_structures(struct adapter *); static int em_allocate_transmit_structures(struct adapter *); -static int em_rxeof(struct adapter *, int); +static int em_rxeof(struct adapter *, int, int); #ifndef __NO_STRICT_ALIGNMENT static int em_fixup_rx(struct adapter *); #endif @@ -292,14 +294,19 @@ static void em_get_hw_control(struct static void em_release_hw_control(struct adapter *); static void em_enable_wakeup(device_t); =20 + +/* + * Fast interrupt handler and legacy ithread/polling modes are + * mutually exclusive. + */ #ifdef DEVICE_POLLING static poll_handler_t em_poll; static void em_intr(void *); #else +static void em_add_int_rx_kthread_priority(struct adapter *, const char = *, + const char *, int *, int); static int em_intr_fast(void *); -static void em_add_rx_process_limit(struct adapter *, const char *, - const char *, int *, int); -static void em_handle_rxtx(void *context, int pending); +static void em_kthread_rx(void *arg); static void em_handle_link(void *context, int pending); #endif =20 @@ -351,9 +358,8 @@ TUNABLE_INT("hw.em.rxd", &em_rxd); TUNABLE_INT("hw.em.txd", &em_txd); TUNABLE_INT("hw.em.smart_pwr_down", &em_smart_pwr_down); #ifndef DEVICE_POLLING -/* How many packets rxeof tries to clean at a time */ -static int em_rx_process_limit =3D 100; -TUNABLE_INT("hw.em.rx_process_limit", &em_rx_process_limit); +static int em_rx_kthread_priority =3D PRI_MAX_KERN; +TUNABLE_INT("hw.em.rx_kthread_priority", &em_rx_kthread_priority); #endif /* Global used in WOL setup with multiport cards */ static int global_quad_port_a =3D 0; @@ -370,7 +376,7 @@ static int global_quad_port_a =3D 0; static int em_probe(device_t dev) { - char adapter_name[60]; + char adapter_name[1024]; /* XXX why? */ uint16_t pci_vendor_id =3D 0; uint16_t pci_device_id =3D 0; uint16_t pci_subvendor_id =3D 0; @@ -431,7 +437,8 @@ em_attach(device_t dev) =20 adapter =3D device_get_softc(dev); adapter->dev =3D adapter->osdep.dev =3D dev; - EM_LOCK_INIT(adapter, device_get_nameunit(dev)); + EM_RXLOCK_INIT(adapter, device_get_nameunit(dev)); + EM_TXLOCK_INIT(adapter, device_get_nameunit(dev)); =20 /* SYSCTL stuff */ SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), @@ -444,8 +451,8 @@ em_attach(device_t dev) OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, em_sysctl_stats, "I", "Statistics"); =20 - callout_init_mtx(&adapter->timer, &adapter->mtx, 0); - callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0); + callout_init_mtx(&adapter->timer, &adapter->txmtx, 0); + callout_init_mtx(&adapter->tx_fifo_timer, &adapter->txmtx, 0); =20 /* Determine hardware and mac info */ em_identify_hardware(adapter); @@ -506,10 +513,10 @@ em_attach(device_t dev) } =20 #ifndef DEVICE_POLLING - /* Sysctls for limiting the amount of work done in the taskqueue */ - em_add_rx_process_limit(adapter, "rx_processing_limit", - "max number of rx packets to process", &adapter->rx_process_limit, - em_rx_process_limit); + /* Sysctls for set the RX kthreads' priority */ + em_add_int_rx_kthread_priority(adapter, "rx_kthread_priority", + "priority of RX handler kthread", &adapter->rx_kthread_priority, + em_rx_kthread_priority); #endif =20 /* @@ -517,25 +524,14 @@ em_attach(device_t dev) * must not exceed hardware maximum, and must be multiple * of E1000_DBA_ALIGN. */ - if (((em_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) !=3D 0 || - (adapter->hw.mac.type >=3D e1000_82544 && em_txd > EM_MAX_TXD) || - (adapter->hw.mac.type < e1000_82544 && em_txd > EM_MAX_TXD_82543) |= | - (em_txd < EM_MIN_TXD)) { - device_printf(dev, "Using %d TX descriptors instead of %d!\n", - EM_DEFAULT_TXD, em_txd); - adapter->num_tx_desc =3D EM_DEFAULT_TXD; - } else - adapter->num_tx_desc =3D em_txd; - if (((em_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) !=3D 0 || - (adapter->hw.mac.type >=3D e1000_82544 && em_rxd > EM_MAX_RXD) || - (adapter->hw.mac.type < e1000_82544 && em_rxd > EM_MAX_RXD_82543) |= | - (em_rxd < EM_MIN_RXD)) { - device_printf(dev, "Using %d RX descriptors instead of %d!\n", - EM_DEFAULT_RXD, em_rxd); - adapter->num_rx_desc =3D EM_DEFAULT_RXD; - } else - adapter->num_rx_desc =3D em_rxd; - + if (adapter->hw.mac.type >=3D e1000_82544) { + adapter->num_tx_desc =3D EM_MAX_TXD; + adapter->num_rx_desc =3D EM_MAX_RXD; + } else { + adapter->num_tx_desc =3D EM_MAX_TXD_82543; + adapter->num_rx_desc =3D EM_MAX_RXD_82543; + } +=09 adapter->hw.mac.autoneg =3D DO_AUTO_NEG; adapter->hw.phy.wait_for_link =3D FALSE; adapter->hw.phy.autoneg_advertised =3D AUTONEG_ADV_DEFAULT; @@ -736,7 +732,9 @@ err_tx_desc: err_pci: em_free_intr(adapter); em_free_pci_resources(adapter); - EM_LOCK_DESTROY(adapter); + /* XXX */ + EM_TXLOCK_DESTROY(adapter); + EM_RXLOCK_DESTROY(adapter); =20 return (error); } @@ -766,7 +764,8 @@ em_detach(device_t dev) =20 em_disable_intr(adapter); em_free_intr(adapter); - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); adapter->in_detach =3D 1; em_stop(adapter); e1000_phy_hw_reset(&adapter->hw); @@ -785,7 +784,8 @@ em_detach(device_t dev) em_enable_wakeup(dev); } =20 - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); ether_ifdetach(adapter->ifp); =20 callout_drain(&adapter->timer); @@ -811,7 +811,8 @@ em_detach(device_t dev) adapter->rx_desc_base =3D NULL; } =20 - EM_LOCK_DESTROY(adapter); + EM_TXLOCK_DESTROY(adapter); + EM_RXLOCK_DESTROY(adapter); =20 return (0); } @@ -836,7 +837,8 @@ em_suspend(device_t dev) { struct adapter *adapter =3D device_get_softc(dev); =20 - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); em_stop(adapter); =20 em_release_manageability(adapter); @@ -853,7 +855,8 @@ em_suspend(device_t dev) em_enable_wakeup(dev); } =20 - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); =20 return bus_generic_suspend(dev); } @@ -864,7 +867,8 @@ em_resume(device_t dev) struct adapter *adapter =3D device_get_softc(dev); struct ifnet *ifp =3D adapter->ifp; =20 - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); em_init_locked(adapter); em_init_manageability(adapter); =20 @@ -872,7 +876,8 @@ em_resume(device_t dev) (ifp->if_drv_flags & IFF_DRV_RUNNING)) em_start_locked(ifp); =20 - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); =20 return bus_generic_resume(dev); } @@ -894,7 +899,7 @@ em_start_locked(struct ifnet *ifp) struct adapter *adapter =3D ifp->if_softc; struct mbuf *m_head; =20 - EM_LOCK_ASSERT(adapter); + EM_TXLOCK_ASSERT(adapter); =20 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=3D IFF_DRV_RUNNING) @@ -906,7 +911,7 @@ em_start_locked(struct ifnet *ifp) =20 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head =3D=3D NULL) - break; + continue; /* * Encapsulation can modify our pointer, and or make it * NULL on failure. In that event, we can't requeue. @@ -926,7 +931,12 @@ em_start_locked(struct ifnet *ifp) ETHER_BPF_MTAP(ifp, m_head); =20 /* Set timeout in case hardware has problems transmitting. */ - adapter->watchdog_timer =3D EM_TX_TIMEOUT; + adapter->tx_counter ++; + } + + if (adapter->num_tx_desc - adapter->num_tx_desc_avail > 32) { + /* it's time to clean a little bit */ + em_txeof (adapter); } } =20 @@ -935,10 +945,10 @@ em_start(struct ifnet *ifp) { struct adapter *adapter =3D ifp->if_softc; =20 - EM_LOCK(adapter); + EM_TXLOCK(adapter); if (ifp->if_drv_flags & IFF_DRV_RUNNING) em_start_locked(ifp); - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); } =20 /********************************************************************* @@ -973,9 +983,11 @@ em_ioctl(struct ifnet *ifp, u_long comma */ ifp->if_flags |=3D IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); em_init_locked(adapter); - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); } arp_ifinit(ifp, ifa); } else @@ -988,7 +1000,8 @@ em_ioctl(struct ifnet *ifp, u_long comma =20 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); =20 - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); switch (adapter->hw.mac.type) { case e1000_82573: /* @@ -1019,7 +1032,8 @@ em_ioctl(struct ifnet *ifp, u_long comma } if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - ETHER_CRC_LEN) { - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); error =3D EINVAL; break; } @@ -1028,13 +1042,15 @@ em_ioctl(struct ifnet *ifp, u_long comma adapter->hw.mac.max_frame_size =3D ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; em_init_locked(adapter); - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); break; } case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl rcv'd:\ SIOCSIFFLAGS (Set Interface Flags)"); - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & @@ -1048,13 +1064,15 @@ em_ioctl(struct ifnet *ifp, u_long comma if (ifp->if_drv_flags & IFF_DRV_RUNNING) em_stop(adapter); adapter->if_flags =3D ifp->if_flags; - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); em_disable_intr(adapter); em_set_multi(adapter); if (adapter->hw.mac.type =3D=3D e1000_82542 &&=20 @@ -1065,19 +1083,23 @@ em_ioctl(struct ifnet *ifp, u_long comma if (!(ifp->if_capenable & IFCAP_POLLING)) #endif em_enable_intr(adapter); - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); } break; case SIOCSIFMEDIA: /* Check SOL/IDER usage */ - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); if (e1000_check_reset_block(&adapter->hw)) { - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); device_printf(adapter->dev, "Media change is" " blocked due to SOL/IDER session.\n"); break; } - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl rcv'd: \ SIOCxIFMEDIA (Get/Set Interface Media)"); @@ -1096,17 +1118,21 @@ em_ioctl(struct ifnet *ifp, u_long comma error =3D ether_poll_register(em_poll, ifp); if (error) return (error); - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); em_disable_intr(adapter); ifp->if_capenable |=3D IFCAP_POLLING; - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); } else { error =3D ether_poll_deregister(ifp); /* Enable interrupt even in error case */ - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); em_enable_intr(adapter); ifp->if_capenable &=3D ~IFCAP_POLLING; - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); } } #endif @@ -1149,29 +1175,49 @@ static void em_watchdog(struct adapter *adapter) { =20 - EM_LOCK_ASSERT(adapter); + EM_TXLOCK_ASSERT(adapter); =20 - /* - ** The timer is set to 5 every time start queues a packet. - ** Then txeof keeps resetting to 5 as long as it cleans at - ** least one descriptor. - ** Finally, anytime all descriptors are clean the timer is - ** set to 0. - */ - if (adapter->watchdog_timer =3D=3D 0 || --adapter->watchdog_timer) - return; + if (E1000_READ_REG(&adapter->hw, E1000_TDH) =3D=3D + E1000_READ_REG(&adapter->hw, E1000_TDT)) { + /* TX queue is clean. Nothing to wait */ + adapter->tx_counter_watchdog_mark =3D 0; + } =20 /* If we are in this routine because of pause frames, then * don't reset the hardware. */ if (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_TXOFF) { - adapter->watchdog_timer =3D EM_TX_TIMEOUT; + /* XOFF received */ + adapter->tx_counter_watchdog_mark =3D 0; + return; + } + + if (!adapter->tx_counter_watchdog_mark) { + /* watchdog isn't started yet, let's do it */ + adapter->tx_counter_watchdog_mark =3D adapter->tx_counter; + adapter->tx_tdh_watchdog_mark =3D E1000_READ_REG(&adapter->hw, E1000_T= DH); + return; + } + + if (adapter->tx_counter - adapter->tx_counter_watchdog_mark >=3D adapte= r->num_tx_desc) { + /* TX ring has been wrapped, clean watchdog condition */ + adapter->tx_counter_watchdog_mark =3D 0; return; } =20 - if (e1000_check_for_link(&adapter->hw) =3D=3D 0) + if (adapter->tx_tdh_watchdog_mark !=3D E1000_READ_REG(&adapter->hw, E10= 00_TDH)) { + /* Something were sent */ + adapter->tx_counter_watchdog_mark =3D 0; + return; + } + + if (e1000_check_for_link(&adapter->hw) =3D=3D 0) { device_printf(adapter->dev, "watchdog timeout -- resetting\n"); + em_print_hw_stats(adapter); + em_print_debug_info(adapter); + } + adapter->ifp->if_drv_flags &=3D ~IFF_DRV_RUNNING; adapter->watchdog_events++; =20 @@ -1198,7 +1244,8 @@ em_init_locked(struct adapter *adapter) =20 INIT_DEBUGOUT("em_init: begin"); =20 - EM_LOCK_ASSERT(adapter); + EM_RXLOCK_ASSERT(adapter); + EM_TXLOCK_ASSERT(adapter); =20 em_stop(adapter); =20 @@ -1337,9 +1384,11 @@ em_init(void *arg) { struct adapter *adapter =3D arg; =20 - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); em_init_locked(adapter); - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); } =20 =20 @@ -1355,9 +1404,11 @@ em_poll(struct ifnet *ifp, enum poll_cmd struct adapter *adapter =3D ifp->if_softc; uint32_t reg_icr; =20 - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) =3D=3D 0) { - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); return; } =20 @@ -1372,12 +1423,13 @@ em_poll(struct ifnet *ifp, enum poll_cmd em_local_timer, adapter); } } - em_rxeof(adapter, count); + em_rxeof(adapter, count, 0); em_txeof(adapter); =20 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) em_start_locked(ifp); - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); } =20 /********************************************************************* @@ -1393,11 +1445,11 @@ em_intr(void *arg) struct ifnet *ifp; uint32_t reg_icr; =20 - EM_LOCK(adapter); + /* XXX EM_LOCK(adapter); */ ifp =3D adapter->ifp; =20 if (ifp->if_capenable & IFCAP_POLLING) { - EM_UNLOCK(adapter); + /* EM_UNLOCK(adapter); */ return; } =20 @@ -1419,29 +1471,35 @@ em_intr(void *arg) if (reg_icr =3D=3D 0xffffffff) break; =20 - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - em_rxeof(adapter, -1); - em_txeof(adapter); - } - /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); callout_stop(&adapter->timer); adapter->hw.mac.get_link_status =3D 1; e1000_check_for_link(&adapter->hw); em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); + } + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + if (reg_icr & (E1000_ICR_RXDMT0|E1000_ICR_RXO|E1000_ICR_RXT0)) { + EM_RXLOCK(adapter); + em_rxeof(adapter, -1,0); + EM_RXUNLOCK(adapter); + } + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + EM_TXLOCK(adapter); + em_start_locked(ifp); + EM_TXUNLOCK(adapter); + } } =20 if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; } - - if (ifp->if_drv_flags & IFF_DRV_RUNNING && - !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - em_start_locked(ifp); - EM_UNLOCK(adapter); } =20 #else /* if not DEVICE_POLLING, then fast interrupt routines only */ @@ -1454,9 +1512,11 @@ em_handle_link(void *context, int pendin =20 ifp =3D adapter->ifp; =20 - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); return; } =20 @@ -1465,33 +1525,37 @@ em_handle_link(void *context, int pendin e1000_check_for_link(&adapter->hw); em_update_link_status(adapter); callout_reset(&adapter->timer, hz, em_local_timer, adapter); - EM_UNLOCK(adapter); + + wakeup (&adapter->rxmtx); + wakeup (&adapter->txmtx); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); } =20 static void -em_handle_rxtx(void *context, int pending) +em_kthread_rx(void *arg) { - struct adapter *adapter =3D context; - struct ifnet *ifp; + struct adapter *adapter =3D arg; + struct ifnet *ifp =3D adapter->ifp; + int myKthreadNo =3D 0; =20 - ifp =3D adapter->ifp; + EM_RXLOCK(adapter); + myKthreadNo =3D adapter -> rxKthreadNo ++; + adapter -> rxIpBeingProcessed[myKthreadNo] =3D 0; + adapter -> waitedBy[myKthreadNo] =3D 0; + EM_RXUNLOCK(adapter); =20 - /* - * TODO: - * It should be possible to run the tx clean loop without the lock. - */ - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - if (em_rxeof(adapter, adapter->rx_process_limit) !=3D 0) - taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); - EM_LOCK(adapter); - em_txeof(adapter); - - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - em_start_locked(ifp); - EM_UNLOCK(adapter); + while (!adapter->rx_shutdown_flag) { + tsleep(&adapter->rxmtx, adapter->rx_kthread_priority, "em_rx", hz); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + EM_RXLOCK(adapter); + em_rxeof(adapter,-1, myKthreadNo); + EM_RXUNLOCK(adapter); + } + em_enable_intr_rx(adapter); } =20 - em_enable_intr(adapter); + kthread_exit(0); } =20 /********************************************************************* @@ -1526,13 +1590,17 @@ em_intr_fast(void *arg) (reg_icr & E1000_ICR_INT_ASSERTED) =3D=3D 0) return (FILTER_STRAY); =20 - /* - * Mask interrupts until the taskqueue is finished running. This is - * cheap, just assume that it is needed. This also works around the - * MSI message reordering errata on certain systems. - */ - em_disable_intr(adapter); - taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + if (reg_icr & (E1000_ICR_RXDMT0|E1000_ICR_RXO|E1000_ICR_RXT0)) { + /* + * Mask interrupts until the taskqueue is finished running. This is + * cheap, just assume that it is needed. This also works around the + * MSI message reordering errata on certain systems. + */ + em_disable_intr_rx (adapter); + wakeup (&adapter->rxmtx); + } + } =20 /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) @@ -1560,7 +1628,8 @@ em_media_status(struct ifnet *ifp, struc =20 INIT_DEBUGOUT("em_media_status: begin"); =20 - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); e1000_check_for_link(&adapter->hw); em_update_link_status(adapter); =20 @@ -1568,7 +1637,8 @@ em_media_status(struct ifnet *ifp, struc ifmr->ifm_active =3D IFM_ETHER; =20 if (!adapter->link_active) { - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); return; } =20 @@ -1596,7 +1666,8 @@ em_media_status(struct ifnet *ifp, struc else ifmr->ifm_active |=3D IFM_HDX; } - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); } =20 /********************************************************************* @@ -1618,7 +1689,8 @@ em_media_change(struct ifnet *ifp) if (IFM_TYPE(ifm->ifm_media) !=3D IFM_ETHER) return (EINVAL); =20 - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: adapter->hw.mac.autoneg =3D DO_AUTO_NEG; @@ -1656,7 +1728,8 @@ em_media_change(struct ifnet *ifp) adapter->hw.phy.reset_disable =3D FALSE; =20 em_init_locked(adapter); - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); =20 return (0); } @@ -2130,7 +2203,8 @@ em_82547_move_tail(void *arg) uint16_t length =3D 0; boolean_t eop =3D 0; =20 - EM_LOCK_ASSERT(adapter); + EM_RXLOCK_ASSERT(adapter); + EM_TXLOCK_ASSERT(adapter); =20 hw_tdt =3D E1000_READ_REG(&adapter->hw, E1000_TDT); sw_tdt =3D adapter->next_avail_tx_desc; @@ -2337,7 +2411,8 @@ em_local_timer(void *arg) struct adapter *adapter =3D arg; struct ifnet *ifp =3D adapter->ifp; =20 - EM_LOCK_ASSERT(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); =20 e1000_check_for_link(&adapter->hw); em_update_link_status(adapter); @@ -2359,6 +2434,9 @@ em_local_timer(void *arg) em_watchdog(adapter); =20 callout_reset(&adapter->timer, hz, em_local_timer, adapter); + + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); } =20 static void @@ -2419,7 +2497,8 @@ em_stop(void *arg) struct adapter *adapter =3D arg; struct ifnet *ifp =3D adapter->ifp; =20 - EM_LOCK_ASSERT(adapter); + EM_RXLOCK_ASSERT(adapter); + EM_TXLOCK_ASSERT(adapter); =20 INIT_DEBUGOUT("em_stop: begin"); =20 @@ -2606,19 +2685,22 @@ em_allocate_intr(struct adapter *adapter * Try allocating a fast interrupt and the associated deferred * processing contexts. */ - TASK_INIT(&adapter->rxtx_task, 0, em_handle_rxtx, adapter); - TASK_INIT(&adapter->link_task, 0, em_handle_link, adapter); - adapter->tq =3D taskqueue_create_fast("em_taskq", M_NOWAIT, - taskqueue_thread_enqueue, &adapter->tq); - taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq", - device_get_nameunit(adapter->dev)); + TASK_INIT(&adapter->link_task, INTR_TYPE_NET | INTR_MPSAFE, em_handle_l= ink, adapter); + + adapter->rx_shutdown_flag=3DFALSE; + adapter->rxKthreadNo=3D0; + adapter->reorder_cnt=3D0; + for (int i =3D 0; i < RX_KTHREADS_NUM; i++) { + adapter->rx_kthreads_handles[i] =3D NULL; + kthread_create (em_kthread_rx, adapter, adapter->rx_kthreads_handles += i,=20 + INTR_TYPE_NET | INTR_FAST | INTR_MPSAFE, 0, "%s_rx_kthread_%d",device= _get_nameunit(dev),i); + } + if ((error =3D bus_setup_intr(dev, adapter->res_interrupt, - INTR_TYPE_NET, em_intr_fast, NULL, adapter, + INTR_TYPE_NET | INTR_FAST | INTR_MPSAFE, em_intr_fast, NULL, adapte= r, &adapter->int_handler_tag)) !=3D 0) { device_printf(dev, "Failed to register fast interrupt " "handler: %d\n", error); - taskqueue_free(adapter->tq); - adapter->tq =3D NULL; return (error); } #endif=20 @@ -2637,11 +2719,12 @@ em_free_intr(struct adapter *adapter) adapter->int_handler_tag); adapter->int_handler_tag =3D NULL; } - if (adapter->tq !=3D NULL) { - taskqueue_drain(adapter->tq, &adapter->rxtx_task); - taskqueue_drain(taskqueue_fast, &adapter->link_task); - taskqueue_free(adapter->tq); - adapter->tq =3D NULL; + taskqueue_drain(taskqueue_fast, &adapter->link_task); + + adapter->rx_shutdown_flag=3DTRUE; + for (int i =3D 0; i < RX_KTHREADS_NUM; i++) { + if (adapter->rx_kthreads_handles[i]) + tsleep(adapter->rx_kthreads_handles[i], 0, "RXSTOP", 3*hz); } } =20 @@ -3138,7 +3221,7 @@ em_initialize_transmit_unit(struct adapt E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);= if(adapter->hw.mac.type >=3D e1000_82540) E1000_WRITE_REG(&adapter->hw, E1000_TADV, - adapter->tx_abs_int_delay.value); + EM_USECS_TO_TICKS(adapter->tx_abs_int_delay.value)); =20 if ((adapter->hw.mac.type =3D=3D e1000_82571) || (adapter->hw.mac.type =3D=3D e1000_82572)) { @@ -3364,6 +3447,10 @@ em_transmit_checksum_setup(struct adapte =20 adapter->num_tx_desc_avail--; adapter->next_avail_tx_desc =3D curr_txd; + + adapter->tx_counter=3D0; + adapter->tx_counter_watchdog_mark=3D0; + adapter->tx_tdh_watchdog_mark=3D0; } =20 /********************************************************************** @@ -3736,7 +3823,7 @@ em_txeof(struct adapter *adapter) struct e1000_tx_desc *tx_desc, *eop_desc; struct ifnet *ifp =3D adapter->ifp; =20 - EM_LOCK_ASSERT(adapter); + EM_TXLOCK_ASSERT(adapter); =20 if (adapter->num_tx_desc_avail =3D=3D adapter->num_tx_desc) return; @@ -3809,15 +3896,8 @@ em_txeof(struct adapter *adapter) * If there are no pending descriptors, clear the timeout. Other= wise, * if some descriptors have been freed, restart the timeout. */ - if (num_avail > EM_TX_CLEANUP_THRESHOLD) { =20 + if (num_avail > EM_TX_CLEANUP_THRESHOLD) ifp->if_drv_flags &=3D ~IFF_DRV_OACTIVE; - /* All clean, turn off the timer */ - if (num_avail =3D=3D adapter->num_tx_desc) - adapter->watchdog_timer =3D 0; - /* Some cleaned, reset the timer */ - else if (num_avail !=3D adapter->num_tx_desc_avail) - adapter->watchdog_timer =3D EM_TX_TIMEOUT; - } adapter->num_tx_desc_avail =3D num_avail; return; } @@ -4144,7 +4224,7 @@ em_free_receive_structures(struct adapte * *********************************************************************/ static int -em_rxeof(struct adapter *adapter, int count) +em_rxeof(struct adapter *adapter, int count, int myKthreadNo) { struct ifnet *ifp; struct mbuf *mp; @@ -4298,15 +4378,57 @@ discard: if (++i =3D=3D adapter->num_rx_desc) i =3D 0; if (m !=3D NULL) { + struct ip *ip =3D mtod(m, struct ip *); + adapter->next_rx_desc_to_check =3D i; -#ifdef DEVICE_POLLING - EM_UNLOCK(adapter); - (*ifp->if_input)(ifp, m); - EM_LOCK(adapter); -#else - /* Already running unlocked */ + + /* + * Trick to avoid reorder: + * + * Don't allow change order of tcp packets + * in same session. In order to make this + * easier, we will not allow to process packets + * from one same source with more than one CPU. + */ + int hlen =3D ip->ip_hl << 2; + if (hlen >=3D sizeof(struct ip)) { /* minimum header length */ + adapter -> rxIpBeingProcessed[myKthreadNo]=3Dip->ip_src.s_addr; + + if (ip->ip_src.s_addr) + for (int k=3D0; k < RX_KTHREADS_NUM; k++) { + if ((adapter->rxIpBeingProcessed[k] =3D=3D ip->ip_src.s_addr)=20 + && !adapter->waitedBy[k]) { + /* + * Packet from the same IP is being processed + * by another thread, wait until that was done. + */ + adapter->reorder_cnt++;=20 + adapter->waitedBy[k] =3D myKthreadNo; + msleep(adapter->rxIpBeingProcessed+k, + &adapter->rxmtx, + adapter->rx_kthread_priority, + "RORDER", -1); + } + } + } else=20 + ip =3D NULL; + + EM_RXUNLOCK(adapter); + (*ifp->if_input)(ifp, m); -#endif + + EM_RXLOCK(adapter); + + adapter->rxIpBeingProcessed[myKthreadNo]=3D0; + + if (adapter->waitedBy[myKthreadNo]) { + /* + * Wakeup threads blocking on our packet process + * procedure due to the reorder prevention check + */ + wakeup(adapter->rxIpBeingProcessed+myKthreadNo); + adapter->waitedBy[myKthreadNo] =3D 0; + } i =3D adapter->next_rx_desc_to_check; } current_desc =3D &adapter->rx_desc_base[i]; @@ -4438,6 +4560,18 @@ em_disable_intr(struct adapter *adapter) E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); } =20 +static void +em_enable_intr_rx(struct adapter *adapter) +{ + E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_RXT0 | E1000_IMS_RXD= MT0 | E1000_IMS_RXO); +} + +static void +em_disable_intr_rx(struct adapter *adapter) +{ + E1000_WRITE_REG(&adapter->hw, E1000_IMC, E1000_IMS_RXT0 | E1000_IMS_RXD= MT0 | E1000_IMS_RXO); +} + /* * Bit of a misnomer, what this really means is * to enable OS management of the system... aka @@ -4878,6 +5012,8 @@ em_print_debug_info(struct adapter *adap adapter->dropped_pkts); device_printf(dev, "Driver tx dma failure in encap =3D %ld\n", adapter->no_tx_dma_setup); + device_printf(dev, "Packets pended due to reorder =3D %ld\n", + adapter->reorder_cnt); } =20 static void @@ -4996,7 +5132,8 @@ em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) =20 adapter =3D info->adapter; =09 - EM_LOCK(adapter); + EM_RXLOCK(adapter); + EM_TXLOCK(adapter); regval =3D E1000_READ_OFFSET(&adapter->hw, info->offset); regval =3D (regval & ~0xffff) | (ticks & 0xffff); /* Handle a few special cases. */ @@ -5014,7 +5151,8 @@ em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) break; } E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval); - EM_UNLOCK(adapter); + EM_TXUNLOCK(adapter); + EM_RXUNLOCK(adapter); return (0); } =20 @@ -5034,7 +5172,7 @@ em_add_int_delay_sysctl(struct adapter * =20 #ifndef DEVICE_POLLING static void -em_add_rx_process_limit(struct adapter *adapter, const char *name, +em_add_int_rx_kthread_priority(struct adapter *adapter, const char *name= , const char *description, int *limit, int value) { *limit =3D value; Index: if_em.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D RCS file: /home/ncvs/src/sys/dev/em/if_em.h,v retrieving revision 1.62 diff -u -p -r1.62 if_em.h --- if_em.h 10 Sep 2007 21:50:40 -0000 1.62 +++ if_em.h 3 Oct 2007 21:35:44 -0000 @@ -82,7 +82,7 @@ POSSIBILITY OF SUCH DAMAGE. * system is reporting dropped transmits, this value may be set too hi= gh * causing the driver to run out of available transmit descriptors. */ -#define EM_TIDV 64 +#define EM_TIDV 65535 =20 /* * EM_TADV - Transmit Absolute Interrupt Delay Value @@ -96,7 +96,7 @@ POSSIBILITY OF SUCH DAMAGE. * along with EM_TIDV, may improve traffic throughput in specific * network conditions. */ -#define EM_TADV 64 +#define EM_TADV 65535 =20 /* * EM_RDTR - Receive Interrupt Delay Timer (Packet Timer) @@ -130,12 +130,12 @@ POSSIBILITY OF SUCH DAMAGE. * along with EM_RDTR, may improve traffic throughput in specific netw= ork * conditions. */ -#define EM_RADV 64 +#define EM_RADV 977 =20 /* * This parameter controls the duration of transmit watchdog timer. */ -#define EM_TX_TIMEOUT 5 /* set to 5 seconds */ +#define EM_TX_TIMEOUT 2 /* set to 2 seconds */ =20 /* * This parameter controls when the driver calls the routine to reclaim @@ -270,15 +270,31 @@ struct adapter { struct ifmedia media; struct callout timer; struct callout tx_fifo_timer; - int watchdog_timer; + + unsigned tx_counter; + unsigned tx_counter_watchdog_mark; + unsigned tx_tdh_watchdog_mark; + int io_rid; int msi; int if_flags; - struct mtx mtx; int em_insert_vlan_header; +=09 + /* RX/TX locks */ + struct mtx rxmtx; + struct mtx txmtx; + struct task link_task; - struct task rxtx_task; - struct taskqueue *tq; /* private task queue */ + +#define RX_KTHREADS_NUM 2 + struct proc *rx_kthreads_handles[RX_KTHREADS_NUM]; + int rx_shutdown_flag; + + in_addr_t rxIpBeingProcessed[RX_KTHREADS_NUM]; + int waitedBy[RX_KTHREADS_NUM]; + int rxKthreadNo; + unsigned long reorder_cnt; + /* Management and WOL features */ int wol; int has_manage; @@ -333,7 +349,7 @@ struct adapter { uint32_t next_rx_desc_to_check; uint32_t rx_buffer_len; uint16_t num_rx_desc; - int rx_process_limit; + int rx_kthread_priority; struct em_buffer *rx_buffer_area; bus_dma_tag_t rxtag; bus_dmamap_t rx_sparemap; @@ -413,11 +429,20 @@ typedef struct _DESCRIPTOR_PAIR uint32_t elements; } DESC_ARRAY, *PDESC_ARRAY; =20 -#define EM_LOCK_INIT(_sc, _name) \ - mtx_init(&(_sc)->mtx, _name, MTX_NETWORK_LOCK, MTX_DEF) -#define EM_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->mtx) -#define EM_LOCK(_sc) mtx_lock(&(_sc)->mtx) -#define EM_UNLOCK(_sc) mtx_unlock(&(_sc)->mtx) -#define EM_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->mtx, MA_OWNED) +#define EM_RXLOCK_INIT(_sc, _name) \ + mtx_init(&(_sc)->rxmtx, _name, MTX_NETWORK_LOCK, MTX_DEF) +#define EM_RXLOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rxmtx) +#define EM_RXLOCK(_sc) mtx_lock(&(_sc)->rxmtx) +#define EM_RXTRYLOCK(_sc) mtx_trylock(&(_sc)->rxmtx) +#define EM_RXUNLOCK(_sc) mtx_unlock(&(_sc)->rxmtx) +#define EM_RXLOCK_ASSERT(_sc) mtx_assert(&(_sc)->rxmtx, MA_OWNED) + +#define EM_TXLOCK_INIT(_sc, _name) \ + mtx_init(&(_sc)->txmtx, _name, MTX_NETWORK_LOCK, MTX_DEF) +#define EM_TXLOCK_DESTROY(_sc) mtx_destroy(&(_sc)->txmtx) +#define EM_TXLOCK(_sc) mtx_lock(&(_sc)->txmtx) +#define EM_TXTRYLOCK(_sc) mtx_trylock(&(_sc)->txmtx) +#define EM_TXUNLOCK(_sc) mtx_unlock(&(_sc)->txmtx) +#define EM_TXLOCK_ASSERT(_sc) mtx_assert(&(_sc)->txmtx, MA_OWNED) =20 #endif /* _EM_H_DEFINED_ */ --------------060708020602090500090901-- --------------enig19778564B8230D4BDB26AB74 Content-Type: application/pgp-signature; name="signature.asc" Content-Description: OpenPGP digital signature Content-Disposition: attachment; filename="signature.asc" -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.7 (Darwin) Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org iD8DBQFHBA2DOfuToMruuMARCkW+AJ9c5eeADIOjd342XJj+h7rv/kiANACcD/g9 Lm8kC0mqO1eP/nH33NN1NAc= =rPFn -----END PGP SIGNATURE----- --------------enig19778564B8230D4BDB26AB74--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?47040D83.9010706>