From owner-svn-src-all@freebsd.org Thu Feb 18 04:59:39 2016 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 0F75EAAC8F7; Thu, 18 Feb 2016 04:59:39 +0000 (UTC) (envelope-from sephe@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id DE0821546; Thu, 18 Feb 2016 04:59:38 +0000 (UTC) (envelope-from sephe@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u1I4xbDT012423; Thu, 18 Feb 2016 04:59:37 GMT (envelope-from sephe@FreeBSD.org) Received: (from sephe@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u1I4xbkk012421; Thu, 18 Feb 2016 04:59:37 GMT (envelope-from sephe@FreeBSD.org) Message-Id: <201602180459.u1I4xbkk012421@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: sephe set sender to sephe@FreeBSD.org using -f From: Sepherosa Ziehau Date: Thu, 18 Feb 2016 04:59:37 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r295740 - head/sys/dev/hyperv/netvsc X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 18 Feb 2016 04:59:39 -0000 Author: sephe Date: Thu Feb 18 04:59:37 2016 New Revision: 295740 URL: https://svnweb.freebsd.org/changeset/base/295740 Log: hyperv/hn: Set the TCP ACK/data segment aggregation limit Set TCP ACK append limit to 1, i.e. aggregate 2 ACKs at most. Aggregating anything more than 2 hurts TCP sending performance in hyperv. This significantly improves the TCP sending performance when the number of concurrent connetion is low (2~8). And it greatly stabilizes the TCP sending performance in other cases. Set TCP data segments aggregation length limit to 37500. Without this limitation, hn(4) could aggregate ~45 TCP data segments for each connection (even at 64 or more connections) before dispatching them to socket code; large aggregation slows down ACK sending and eventually hurts/destabilizes TCP reception performance. This setting stabilizes and improves TCP reception performance for >4 concurrent connections significantly. Make them sysctls so they could be adjusted. Reviewed by: adrian, gallatin (previous version), hselasky (previous version) Approved by: adrian (mentor) MFC after: 1 week Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D5185 Modified: head/sys/dev/hyperv/netvsc/hv_net_vsc.h head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Modified: head/sys/dev/hyperv/netvsc/hv_net_vsc.h ============================================================================== --- head/sys/dev/hyperv/netvsc/hv_net_vsc.h Thu Feb 18 04:58:34 2016 (r295739) +++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h Thu Feb 18 04:59:37 2016 (r295740) @@ -1030,7 +1030,6 @@ typedef struct hn_softc { struct task hn_txeof_task; struct lro_ctrl hn_lro; - int hn_lro_hiwat; /* Trust csum verification on host side */ int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */ Modified: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c ============================================================================== --- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Thu Feb 18 04:58:34 2016 (r295739) +++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Thu Feb 18 04:59:37 2016 (r295740) @@ -176,14 +176,11 @@ struct hn_txdesc { #define HN_CSUM_ASSIST_WIN8 (CSUM_TCP) #define HN_CSUM_ASSIST (CSUM_IP | CSUM_UDP | CSUM_TCP) -/* XXX move to netinet/tcp_lro.h */ -#define HN_LRO_HIWAT_MAX 65535 -#define HN_LRO_HIWAT_DEF HN_LRO_HIWAT_MAX +#define HN_LRO_LENLIM_DEF (25 * ETHERMTU) /* YYY 2*MTU is a bit rough, but should be good enough. */ -#define HN_LRO_HIWAT_MTULIM(ifp) (2 * (ifp)->if_mtu) -#define HN_LRO_HIWAT_ISVALID(sc, hiwat) \ - ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) || \ - (hiwat) <= HN_LRO_HIWAT_MAX) +#define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) + +#define HN_LRO_ACKCNT_DEF 1 /* * Be aware that this sleepable mutex will exhibit WITNESS errors when @@ -253,9 +250,8 @@ static void hn_start(struct ifnet *ifp); static void hn_start_txeof(struct ifnet *ifp); static int hn_ifmedia_upd(struct ifnet *ifp); static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); -#ifdef HN_LRO_HIWAT -static int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS); -#endif +static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS); static int hn_check_iplen(const struct mbuf *, int); @@ -265,15 +261,6 @@ static void hn_start_taskfunc(void *xsc, static void hn_txeof_taskfunc(void *xsc, int pending); static int hn_encap(struct hn_softc *, struct hn_txdesc *, struct mbuf **); -static __inline void -hn_set_lro_hiwat(struct hn_softc *sc, int hiwat) -{ - sc->hn_lro_hiwat = hiwat; -#ifdef HN_LRO_HIWAT - sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; -#endif -} - static int hn_ifmedia_upd(struct ifnet *ifp __unused) { @@ -358,7 +345,6 @@ netvsc_attach(device_t dev) bzero(sc, sizeof(hn_softc_t)); sc->hn_unit = unit; sc->hn_dev = dev; - sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF; sc->hn_direct_tx_size = hn_direct_tx_size; if (hn_trust_hosttcp) sc->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; @@ -442,9 +428,8 @@ netvsc_attach(device_t dev) /* Driver private LRO settings */ sc->hn_lro.ifp = ifp; #endif -#ifdef HN_LRO_HIWAT - sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; -#endif + sc->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; + sc->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; #endif /* INET || INET6 */ #if __FreeBSD_version >= 1100045 @@ -480,11 +465,12 @@ netvsc_attach(device_t dev) CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried", CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries"); -#ifdef HN_LRO_HIWAT - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat", - CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl, - "I", "LRO high watermark"); -#endif + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", + CTLTYPE_UINT | CTLFLAG_RW, sc, 0, hn_lro_lenlim_sysctl, "IU", + "Max # of data bytes to be aggregated by LRO"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", + CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_ackcnt_sysctl, "I", + "Max # of ACKs to be aggregated by LRO"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", CTLTYPE_INT | CTLFLAG_RW, sc, HN_TRUST_HCSUM_TCP, hn_trust_hcsum_sysctl, "I", @@ -1410,12 +1396,13 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, /* Obtain and record requested MTU */ ifp->if_mtu = ifr->ifr_mtu; + /* - * Make sure that LRO high watermark is still valid, - * after MTU change (the 2*MTU limit). + * Make sure that LRO aggregation length limit is still + * valid, after the MTU change. */ - if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat)) - hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp)); + if (sc->hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) + sc->hn_lro.lro_length_lim = HN_LRO_LENLIM_MIN(ifp); do { NV_LOCK(sc); @@ -1722,26 +1709,51 @@ hn_watchdog(struct ifnet *ifp) } #endif -#ifdef HN_LRO_HIWAT static int -hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS) +hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; - int hiwat, error; + unsigned int lenlim; + int error; - hiwat = sc->hn_lro_hiwat; - error = sysctl_handle_int(oidp, &hiwat, 0, req); + lenlim = sc->hn_lro.lro_length_lim; + error = sysctl_handle_int(oidp, &lenlim, 0, req); if (error || req->newptr == NULL) return error; - if (!HN_LRO_HIWAT_ISVALID(sc, hiwat)) + if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || + lenlim > TCP_LRO_LENGTH_MAX) return EINVAL; - if (sc->hn_lro_hiwat != hiwat) - hn_set_lro_hiwat(sc, hiwat); + sc->hn_lro.lro_length_lim = lenlim; + return 0; +} + +static int +hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct hn_softc *sc = arg1; + int ackcnt, error; + + /* + * lro_ackcnt_lim is append count limit, + * +1 to turn it into aggregation limit. + */ + ackcnt = sc->hn_lro.lro_ackcnt_lim + 1; + error = sysctl_handle_int(oidp, &ackcnt, 0, req); + if (error || req->newptr == NULL) + return error; + + if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) + return EINVAL; + + /* + * Convert aggregation limit back to append + * count limit. + */ + sc->hn_lro.lro_ackcnt_lim = ackcnt - 1; return 0; } -#endif /* HN_LRO_HIWAT */ static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)