From owner-svn-src-head@freebsd.org Tue Jan 26 09:42:15 2016 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 0CA8AA46036; Tue, 26 Jan 2016 09:42:15 +0000 (UTC) (envelope-from sephe@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id DB5C8C16; Tue, 26 Jan 2016 09:42:14 +0000 (UTC) (envelope-from sephe@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u0Q9gD0R062311; Tue, 26 Jan 2016 09:42:13 GMT (envelope-from sephe@FreeBSD.org) Received: (from sephe@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u0Q9gD14062309; Tue, 26 Jan 2016 09:42:13 GMT (envelope-from sephe@FreeBSD.org) Message-Id: <201601260942.u0Q9gD14062309@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: sephe set sender to sephe@FreeBSD.org using -f From: Sepherosa Ziehau Date: Tue, 26 Jan 2016 09:42:13 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r294788 - head/sys/dev/hyperv/netvsc X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 26 Jan 2016 09:42:15 -0000 Author: sephe Date: Tue Jan 26 09:42:13 2016 New Revision: 294788 URL: https://svnweb.freebsd.org/changeset/base/294788 Log: hyperv/hn: Improve sending performance - Avoid main lock contention by trylock for if_start, if that fails, schedule TX taskqueue for if_start - Don't do direct sending if the packet to be sent is large, e.g. TSO packet. This change gives me stable 9.1Gbps TCP sending performance w/ TSO over a 10Gbe directly connected network (the performance fluctuated between 4Gbps and 9Gbps before this commit). It also improves non- TSO TCP sending performance a lot. Reviewed by: adrian, royger Approved by: adrian (mentor) Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D5074 Modified: head/sys/dev/hyperv/netvsc/hv_net_vsc.h head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Modified: head/sys/dev/hyperv/netvsc/hv_net_vsc.h ============================================================================== --- head/sys/dev/hyperv/netvsc/hv_net_vsc.h Tue Jan 26 09:09:20 2016 (r294787) +++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h Tue Jan 26 09:42:13 2016 (r294788) @@ -39,9 +39,11 @@ #define __HV_NET_VSC_H__ #include +#include #include #include #include +#include #include #include @@ -1008,7 +1010,6 @@ typedef struct hn_softc { struct hv_device *hn_dev_obj; netvsc_dev *net_dev; - int hn_txdesc_cnt; struct hn_txdesc *hn_txdesc; bus_dma_tag_t hn_tx_data_dtag; bus_dma_tag_t hn_tx_rndis_dtag; @@ -1017,9 +1018,15 @@ typedef struct hn_softc { struct mtx hn_txlist_spin; struct hn_txdesc_list hn_txlist; + int hn_txdesc_cnt; int hn_txdesc_avail; int hn_txeof; + int hn_direct_tx_size; + struct taskqueue *hn_tx_taskq; + struct task hn_start_task; + struct task hn_txeof_task; + struct lro_ctrl hn_lro; int hn_lro_hiwat; Modified: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c ============================================================================== --- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Tue Jan 26 09:09:20 2016 (r294787) +++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Tue Jan 26 09:42:13 2016 (r294788) @@ -146,6 +146,8 @@ __FBSDID("$FreeBSD$"); #define HN_TX_DATA_SEGCNT_MAX \ (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS) +#define HN_DIRECT_TX_SIZE_DEF 128 + struct hn_txdesc { SLIST_ENTRY(hn_txdesc) link; struct mbuf *m; @@ -194,6 +196,7 @@ struct hn_txdesc { #define NV_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) #define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) +#define NV_TRYLOCK(_sc) mtx_trylock(&(_sc)->hn_lock) #define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) #define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) #define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) @@ -219,6 +222,10 @@ TUNABLE_INT("dev.hn.tso_maxlen", &hn_tso static int hn_tx_chimney_size = 0; TUNABLE_INT("dev.hn.tx_chimney_size", &hn_tx_chimney_size); +/* Limit the size of packet for direct transmission */ +static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; +TUNABLE_INT("dev.hn.direct_tx_size", &hn_direct_tx_size); + /* * Forward declarations */ @@ -226,8 +233,9 @@ static void hn_stop(hn_softc_t *sc); static void hn_ifinit_locked(hn_softc_t *sc); static void hn_ifinit(void *xsc); static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); -static void hn_start_locked(struct ifnet *ifp); +static int hn_start_locked(struct ifnet *ifp, int len); static void hn_start(struct ifnet *ifp); +static void hn_start_txeof(struct ifnet *ifp); static int hn_ifmedia_upd(struct ifnet *ifp); static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); #ifdef HN_LRO_HIWAT @@ -237,6 +245,8 @@ static int hn_tx_chimney_size_sysctl(SYS static int hn_check_iplen(const struct mbuf *, int); static int hn_create_tx_ring(struct hn_softc *sc); static void hn_destroy_tx_ring(struct hn_softc *sc); +static void hn_start_taskfunc(void *xsc, int pending); +static void hn_txeof_taskfunc(void *xsc, int pending); static __inline void hn_set_lro_hiwat(struct hn_softc *sc, int hiwat) @@ -384,6 +394,14 @@ netvsc_attach(device_t dev) sc->hn_dev = dev; sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF; sc->hn_trust_hosttcp = hn_trust_hosttcp; + sc->hn_direct_tx_size = hn_direct_tx_size; + + sc->hn_tx_taskq = taskqueue_create_fast("hn_tx", M_WAITOK, + taskqueue_thread_enqueue, &sc->hn_tx_taskq); + taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", + device_get_nameunit(dev)); + TASK_INIT(&sc->hn_start_task, 0, hn_start_taskfunc, sc); + TASK_INIT(&sc->hn_txeof_task, 0, hn_txeof_taskfunc, sc); error = hn_create_tx_ring(sc); if (error) @@ -524,6 +542,9 @@ netvsc_attach(device_t dev) SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl, "I", "Chimney send packet size limit"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "direct_tx_size", + CTLFLAG_RW, &sc->hn_direct_tx_size, 0, + "Size of the packet for direct transmission"); if (unit == 0) { struct sysctl_ctx_list *dc_ctx; @@ -548,6 +569,9 @@ netvsc_attach(device_t dev) SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tso_maxlen", CTLFLAG_RD, &hn_tso_maxlen, 0, "TSO burst limit"); #endif + SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "direct_tx_size", + CTLFLAG_RD, &hn_direct_tx_size, 0, + "Size of the packet for direct transmission"); } return (0); @@ -583,6 +607,10 @@ netvsc_detach(device_t dev) hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); + taskqueue_drain(sc->hn_tx_taskq, &sc->hn_start_task); + taskqueue_drain(sc->hn_tx_taskq, &sc->hn_txeof_task); + taskqueue_free(sc->hn_tx_taskq); + ifmedia_removeall(&sc->hn_media); #if defined(INET) || defined(INET6) tcp_lro_free(&sc->hn_lro); @@ -733,24 +761,19 @@ void netvsc_channel_rollup(struct hv_device *device_ctx) { struct hn_softc *sc = device_get_softc(device_ctx->device); - struct ifnet *ifp; if (!sc->hn_txeof) return; sc->hn_txeof = 0; - ifp = sc->hn_ifp; - NV_LOCK(sc); - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - hn_start_locked(ifp); - NV_UNLOCK(sc); + hn_start_txeof(sc->hn_ifp); } /* * Start a transmit of one or more packets */ -static void -hn_start_locked(struct ifnet *ifp) +static int +hn_start_locked(struct ifnet *ifp, int len) { hn_softc_t *sc = ifp->if_softc; struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); @@ -768,7 +791,7 @@ hn_start_locked(struct ifnet *ifp) if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) - return; + return 0; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; @@ -781,11 +804,21 @@ hn_start_locked(struct ifnet *ifp) if (m_head == NULL) break; + if (len > 0 && m_head->m_pkthdr.len > len) { + /* + * This sending could be time consuming; let callers + * dispatch this packet sending (and sending of any + * following up packets) to tx taskqueue. + */ + IF_PREPEND(&ifp->if_snd, m_head); + return 1; + } + txd = hn_txdesc_get(sc); if (txd == NULL) { sc->hn_no_txdescs++; IF_PREPEND(&ifp->if_snd, m_head); - ifp->if_drv_flags |= IFF_DRV_OACTIVE; + atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } @@ -1060,10 +1093,11 @@ again: sc->hn_send_failed++; IF_PREPEND(&ifp->if_snd, m_head); - ifp->if_drv_flags |= IFF_DRV_OACTIVE; + atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } + return 0; } /* @@ -1555,7 +1589,8 @@ hn_stop(hn_softc_t *sc) if (bootverbose) printf(" Closing Device ...\n"); - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + atomic_clear_int(&ifp->if_drv_flags, + (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); if_link_state_change(ifp, LINK_STATE_DOWN); sc->hn_initdone = 0; @@ -1571,13 +1606,43 @@ hn_start(struct ifnet *ifp) hn_softc_t *sc; sc = ifp->if_softc; - NV_LOCK(sc); - if (sc->temp_unusable) { + if (NV_TRYLOCK(sc)) { + int sched; + + sched = hn_start_locked(ifp, sc->hn_direct_tx_size); NV_UNLOCK(sc); - return; + if (!sched) + return; + } + taskqueue_enqueue_fast(sc->hn_tx_taskq, &sc->hn_start_task); +} + +static void +hn_start_txeof(struct ifnet *ifp) +{ + hn_softc_t *sc; + + sc = ifp->if_softc; + if (NV_TRYLOCK(sc)) { + int sched; + + atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); + sched = hn_start_locked(ifp, sc->hn_direct_tx_size); + NV_UNLOCK(sc); + if (sched) { + taskqueue_enqueue_fast(sc->hn_tx_taskq, + &sc->hn_start_task); + } + } else { + /* + * Release the OACTIVE earlier, with the hope, that + * others could catch up. The task will clear the + * flag again with the NV_LOCK to avoid possible + * races. + */ + atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); + taskqueue_enqueue_fast(sc->hn_tx_taskq, &sc->hn_txeof_task); } - hn_start_locked(ifp); - NV_UNLOCK(sc); } /* @@ -1604,8 +1669,8 @@ hn_ifinit_locked(hn_softc_t *sc) } else { sc->hn_initdone = 1; } - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); + atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); if_link_state_change(ifp, LINK_STATE_UP); } @@ -1907,6 +1972,28 @@ hn_destroy_tx_ring(struct hn_softc *sc) mtx_destroy(&sc->hn_txlist_spin); } +static void +hn_start_taskfunc(void *xsc, int pending __unused) +{ + struct hn_softc *sc = xsc; + + NV_LOCK(sc); + hn_start_locked(sc->hn_ifp, 0); + NV_UNLOCK(sc); +} + +static void +hn_txeof_taskfunc(void *xsc, int pending __unused) +{ + struct hn_softc *sc = xsc; + struct ifnet *ifp = sc->hn_ifp; + + NV_LOCK(sc); + atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); + hn_start_locked(ifp, 0); + NV_UNLOCK(sc); +} + static device_method_t netvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netvsc_probe),