From owner-svn-src-stable@freebsd.org Mon Jun 13 08:03:54 2016 Return-Path: Delivered-To: svn-src-stable@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id D4031AF153B; Mon, 13 Jun 2016 08:03:54 +0000 (UTC) (envelope-from sephe@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 96CA62804; Mon, 13 Jun 2016 08:03:54 +0000 (UTC) (envelope-from sephe@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u5D83rCA099038; Mon, 13 Jun 2016 08:03:53 GMT (envelope-from sephe@FreeBSD.org) Received: (from sephe@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u5D83rBm099036; Mon, 13 Jun 2016 08:03:53 GMT (envelope-from sephe@FreeBSD.org) Message-Id: <201606130803.u5D83rBm099036@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: sephe set sender to sephe@FreeBSD.org using -f From: Sepherosa Ziehau Date: Mon, 13 Jun 2016 08:03:53 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r301862 - stable/10/sys/dev/hyperv/netvsc X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 13 Jun 2016 08:03:54 -0000 Author: sephe Date: Mon Jun 13 08:03:53 2016 New Revision: 301862 URL: https://svnweb.freebsd.org/changeset/base/301862 Log: MFC 295748,295792,295793,295794 295748 hyperv/hn: Use buf_ring for txdesc list So one spinlock is avoided, which would be potentially dangerous for virtual machine, if the spinlock holder was scheduled out by the host, as noted by royger. Old spinlock based txdesc list is still kept around, so we could have a safe fallback. No performance regression nor improvement is observed. Reviewed by: adrian Approved by: adrian (mentor) MFC after: 1 week Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D5290 295792 hyperv/hn: Add option to bind TX taskqueues to the specified CPU It will be used to help tracking host side transmission ring selection issue; and it will be turned on by default, once we have concrete result. Reviewed by: adrian, Jun Su Approved by: adrian (mento) MFC after: 1 week Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D5316 295793 hyperv/hn: Enable IP header checksum offloading for WIN8 (WinServ2012) Tested on Windows Server 2012. Reviewed by: adrian Approved by: adrian (mentor) MFC after: 1 week Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D5317 295794 hyperv/hn: Free the txdesc buf_ring when the TX ring is destroyed Reviewed by: adrian Approved by: adrian (mentor) MFC after: 1 week Sponsored by: Microsoft OSTC Differential Revision: https://reviews.freebsd.org/D5318 Modified: stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h ============================================================================== --- stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h Mon Jun 13 07:30:54 2016 (r301861) +++ stable/10/sys/dev/hyperv/netvsc/hv_net_vsc.h Mon Jun 13 08:03:53 2016 (r301862) @@ -58,6 +58,8 @@ #include +#define HN_USE_TXDESC_BUFRING + MALLOC_DECLARE(M_NETVSC); #define NVSP_INVALID_PROTOCOL_VERSION (0xFFFFFFFF) @@ -990,8 +992,12 @@ typedef struct { hv_bool_uint8_t link_state; } netvsc_device_info; +#ifndef HN_USE_TXDESC_BUFRING struct hn_txdesc; SLIST_HEAD(hn_txdesc_list, hn_txdesc); +#else +struct buf_ring; +#endif struct hn_rx_ring { struct lro_ctrl hn_lro; @@ -1012,8 +1018,12 @@ struct hn_rx_ring { #define HN_TRUST_HCSUM_UDP 0x0004 struct hn_tx_ring { +#ifndef HN_USE_TXDESC_BUFRING struct mtx hn_txlist_spin; struct hn_txdesc_list hn_txlist; +#else + struct buf_ring *hn_txdesc_br; +#endif int hn_txdesc_cnt; int hn_txdesc_avail; int hn_txeof; Modified: stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c ============================================================================== --- stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Mon Jun 13 07:30:54 2016 (r301861) +++ stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Mon Jun 13 08:03:53 2016 (r301862) @@ -66,10 +66,12 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include +#include #include #include @@ -151,7 +153,9 @@ __FBSDID("$FreeBSD$"); #define HN_DIRECT_TX_SIZE_DEF 128 struct hn_txdesc { +#ifndef HN_USE_TXDESC_BUFRING SLIST_ENTRY(hn_txdesc) link; +#endif struct mbuf *m; struct hn_tx_ring *txr; int refs; @@ -173,7 +177,7 @@ struct hn_txdesc { * later. UDP checksum offloading doesn't work on earlier * Windows releases. */ -#define HN_CSUM_ASSIST_WIN8 (CSUM_TCP) +#define HN_CSUM_ASSIST_WIN8 (CSUM_IP | CSUM_TCP) #define HN_CSUM_ASSIST (CSUM_IP | CSUM_UDP | CSUM_TCP) #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) @@ -258,6 +262,18 @@ SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_ta static struct taskqueue *hn_tx_taskq; +#ifndef HN_USE_TXDESC_BUFRING +static int hn_use_txdesc_bufring = 0; +#else +static int hn_use_txdesc_bufring = 1; +#endif +SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, + &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); + +static int hn_bind_tx_taskq = -1; +SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, + &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); + /* * Forward declarations */ @@ -343,6 +359,19 @@ netvsc_probe(device_t dev) return (ENXIO); } +static void +hn_cpuset_setthread_task(void *xmask, int pending __unused) +{ + cpuset_t *mask = xmask; + int error; + + error = cpuset_setthread(curthread->td_tid, mask); + if (error) { + panic("curthread=%ju: can't pin; error=%d", + (uintmax_t)curthread->td_tid, error); + } +} + /* * Standard attach entry point. * @@ -376,6 +405,19 @@ netvsc_attach(device_t dev) taskqueue_thread_enqueue, &sc->hn_tx_taskq); taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", device_get_nameunit(dev)); + if (hn_bind_tx_taskq >= 0) { + int cpu = hn_bind_tx_taskq; + struct task cpuset_task; + cpuset_t cpu_set; + + if (cpu > mp_ncpus - 1) + cpu = mp_ncpus - 1; + CPU_SETOF(cpu, &cpu_set); + TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, + &cpu_set); + taskqueue_enqueue(sc->hn_tx_taskq, &cpuset_task); + taskqueue_drain(sc->hn_tx_taskq, &cpuset_task); + } } else { sc->hn_tx_taskq = hn_tx_taskq; } @@ -572,6 +614,7 @@ hn_txdesc_put(struct hn_tx_ring *txr, st txd->flags |= HN_TXD_FLAG_ONLIST; +#ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); KASSERT(txr->hn_txdesc_avail >= 0 && txr->hn_txdesc_avail < txr->hn_txdesc_cnt, @@ -579,6 +622,10 @@ hn_txdesc_put(struct hn_tx_ring *txr, st txr->hn_txdesc_avail++; SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); mtx_unlock_spin(&txr->hn_txlist_spin); +#else + atomic_add_int(&txr->hn_txdesc_avail, 1); + buf_ring_enqueue(txr->hn_txdesc_br, txd); +#endif return 1; } @@ -588,6 +635,7 @@ hn_txdesc_get(struct hn_tx_ring *txr) { struct hn_txdesc *txd; +#ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); txd = SLIST_FIRST(&txr->hn_txlist); if (txd != NULL) { @@ -597,8 +645,14 @@ hn_txdesc_get(struct hn_tx_ring *txr) SLIST_REMOVE_HEAD(&txr->hn_txlist, link); } mtx_unlock_spin(&txr->hn_txlist_spin); +#else + txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); +#endif if (txd != NULL) { +#ifdef HN_USE_TXDESC_BUFRING + atomic_subtract_int(&txr->hn_txdesc_avail, 1); +#endif KASSERT(txd->m == NULL && txd->refs == 0 && (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd")); txd->flags &= ~HN_TXD_FLAG_ONLIST; @@ -2061,13 +2115,20 @@ hn_create_tx_ring(struct hn_softc *sc, i txr->hn_sc = sc; +#ifndef HN_USE_TXDESC_BUFRING mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); +#endif mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); txr->hn_txdesc_cnt = HN_TX_DESC_CNT; txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK | M_ZERO); +#ifndef HN_USE_TXDESC_BUFRING SLIST_INIT(&txr->hn_txlist); +#else + txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC, + M_WAITOK, &txr->hn_tx_lock); +#endif txr->hn_tx_taskq = sc->hn_tx_taskq; TASK_INIT(&txr->hn_start_task, 0, hn_start_taskfunc, txr); @@ -2171,7 +2232,11 @@ hn_create_tx_ring(struct hn_softc *sc, i /* All set, put it to list */ txd->flags |= HN_TXD_FLAG_ONLIST; +#ifndef HN_USE_TXDESC_BUFRING SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); +#else + buf_ring_enqueue(txr->hn_txdesc_br, txd); +#endif } txr->hn_txdesc_avail = txr->hn_txdesc_cnt; @@ -2204,6 +2269,20 @@ hn_create_tx_ring(struct hn_softc *sc, i } static void +hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) +{ + struct hn_tx_ring *txr = txd->txr; + + KASSERT(txd->m == NULL, ("still has mbuf installed")); + KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); + + bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); + bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, + txd->rndis_msg_dmap); + bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); +} + +static void hn_destroy_tx_ring(struct hn_tx_ring *txr) { struct hn_txdesc *txd; @@ -2211,28 +2290,31 @@ hn_destroy_tx_ring(struct hn_tx_ring *tx if (txr->hn_txdesc == NULL) return; +#ifndef HN_USE_TXDESC_BUFRING while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { - KASSERT(txd->m == NULL, ("still has mbuf installed")); - KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, - ("still dma mapped")); SLIST_REMOVE_HEAD(&txr->hn_txlist, link); - - bus_dmamap_unload(txr->hn_tx_rndis_dtag, - txd->rndis_msg_dmap); - bus_dmamem_free(txr->hn_tx_rndis_dtag, - txd->rndis_msg, txd->rndis_msg_dmap); - - bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); + hn_txdesc_dmamap_destroy(txd); } +#else + while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL) + hn_txdesc_dmamap_destroy(txd); +#endif if (txr->hn_tx_data_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_data_dtag); if (txr->hn_tx_rndis_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); + +#ifdef HN_USE_TXDESC_BUFRING + buf_ring_free(txr->hn_txdesc_br, M_NETVSC); +#endif + free(txr->hn_txdesc, M_NETVSC); txr->hn_txdesc = NULL; +#ifndef HN_USE_TXDESC_BUFRING mtx_destroy(&txr->hn_txlist_spin); +#endif mtx_destroy(&txr->hn_tx_lock); } @@ -2377,6 +2459,18 @@ hn_tx_taskq_create(void *arg __unused) hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &hn_tx_taskq); taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); + if (hn_bind_tx_taskq >= 0) { + int cpu = hn_bind_tx_taskq; + struct task cpuset_task; + cpuset_t cpu_set; + + if (cpu > mp_ncpus - 1) + cpu = mp_ncpus - 1; + CPU_SETOF(cpu, &cpu_set); + TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, &cpu_set); + taskqueue_enqueue(hn_tx_taskq, &cpuset_task); + taskqueue_drain(hn_tx_taskq, &cpuset_task); + } } SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_create, NULL);