Date: Fri, 11 Nov 2016 07:25:14 +0000 (UTC) From: Sepherosa Ziehau <sephe@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r308511 - in stable/10/sys: conf dev/hyperv/netvsc modules/hyperv/netvsc Message-ID: <201611110725.uAB7PEvt087134@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: sephe Date: Fri Nov 11 07:25:14 2016 New Revision: 308511 URL: https://svnweb.freebsd.org/changeset/base/308511 Log: MFC 308163 hyperv/hn: Rename cleaned up file. Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D8390 Added: stable/10/sys/dev/hyperv/netvsc/if_hn.c - copied unchanged from r308510, stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Deleted: stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c Modified: stable/10/sys/conf/files.amd64 stable/10/sys/conf/files.i386 stable/10/sys/modules/hyperv/netvsc/Makefile Directory Properties: stable/10/ (props changed) Modified: stable/10/sys/conf/files.amd64 ============================================================================== --- stable/10/sys/conf/files.amd64 Fri Nov 11 07:13:17 2016 (r308510) +++ stable/10/sys/conf/files.amd64 Fri Nov 11 07:25:14 2016 (r308511) @@ -263,7 +263,7 @@ dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/hyperv/netvsc/hn_nvs.c optional hyperv dev/hyperv/netvsc/hn_rndis.c optional hyperv -dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c optional hyperv +dev/hyperv/netvsc/if_hn.c optional hyperv dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c optional hyperv dev/hyperv/utilities/hv_heartbeat.c optional hyperv dev/hyperv/utilities/hv_kvp.c optional hyperv Modified: stable/10/sys/conf/files.i386 ============================================================================== --- stable/10/sys/conf/files.i386 Fri Nov 11 07:13:17 2016 (r308510) +++ stable/10/sys/conf/files.i386 Fri Nov 11 07:25:14 2016 (r308511) @@ -240,7 +240,7 @@ dev/hwpmc/hwpmc_tsc.c optional hwpmc dev/hwpmc/hwpmc_x86.c optional hwpmc dev/hyperv/netvsc/hn_nvs.c optional hyperv dev/hyperv/netvsc/hn_rndis.c optional hyperv -dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c optional hyperv +dev/hyperv/netvsc/if_hn.c optional hyperv dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c optional hyperv dev/hyperv/utilities/hv_heartbeat.c optional hyperv dev/hyperv/utilities/hv_kvp.c optional hyperv Copied: stable/10/sys/dev/hyperv/netvsc/if_hn.c (from r308510, stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c) ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ stable/10/sys/dev/hyperv/netvsc/if_hn.c Fri Nov 11 07:25:14 2016 (r308511, copy of r308510, stable/10/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c) @@ -0,0 +1,4685 @@ +/*- + * Copyright (c) 2010-2012 Citrix Inc. + * Copyright (c) 2009-2012,2016 Microsoft Corp. + * Copyright (c) 2012 NetApp Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 2004-2006 Kip Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_inet6.h" +#include "opt_inet.h" + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/limits.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/lock.h> +#include <sys/smp.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/sx.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <sys/taskqueue.h> +#include <sys/buf_ring.h> + +#include <machine/atomic.h> +#include <machine/in_cksum.h> + +#include <net/bpf.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <net/if_arp.h> +#include <net/if_media.h> +#include <net/if_types.h> +#include <net/if_var.h> +#include <net/if_vlan_var.h> +#include <net/rndis.h> + +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/tcp_lro.h> +#include <netinet/udp.h> + +#include <dev/hyperv/include/hyperv.h> +#include <dev/hyperv/include/hyperv_busdma.h> +#include <dev/hyperv/include/vmbus.h> +#include <dev/hyperv/include/vmbus_xact.h> + +#include <dev/hyperv/netvsc/ndis.h> +#include <dev/hyperv/netvsc/if_hnreg.h> +#include <dev/hyperv/netvsc/if_hnvar.h> +#include <dev/hyperv/netvsc/hn_nvs.h> +#include <dev/hyperv/netvsc/hn_rndis.h> + +#include "vmbus_if.h" + +#define HN_RING_CNT_DEF_MAX 8 + +/* YYY should get it from the underlying channel */ +#define HN_TX_DESC_CNT 512 + +#define HN_RNDIS_PKT_LEN \ + (sizeof(struct rndis_packet_msg) + \ + HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ + HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ + HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ + HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) +#define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE +#define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE + +#define HN_TX_DATA_BOUNDARY PAGE_SIZE +#define HN_TX_DATA_MAXSIZE IP_MAXPACKET +#define HN_TX_DATA_SEGSIZE PAGE_SIZE +/* -1 for RNDIS packet message */ +#define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1) + +#define HN_DIRECT_TX_SIZE_DEF 128 + +#define HN_EARLY_TXEOF_THRESH 8 + +#define HN_PKTBUF_LEN_DEF (16 * 1024) + +#define HN_LROENT_CNT_DEF 128 + +#define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) +#define HN_LRO_LENLIM_DEF (25 * ETHERMTU) +/* YYY 2*MTU is a bit rough, but should be good enough. */ +#define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) + +#define HN_LRO_ACKCNT_DEF 1 + +#define HN_LOCK_INIT(sc) \ + sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev)) +#define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock) +#define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED) +#define HN_LOCK(sc) sx_xlock(&(sc)->hn_lock) +#define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock) + +#define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP) +#define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP) +#define HN_CSUM_IP_HWASSIST(sc) \ + ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK) +#define HN_CSUM_IP6_HWASSIST(sc) \ + ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK) + +struct hn_txdesc { +#ifndef HN_USE_TXDESC_BUFRING + SLIST_ENTRY(hn_txdesc) link; +#endif + struct mbuf *m; + struct hn_tx_ring *txr; + int refs; + uint32_t flags; /* HN_TXD_FLAG_ */ + struct hn_nvs_sendctx send_ctx; + uint32_t chim_index; + int chim_size; + + bus_dmamap_t data_dmap; + + bus_addr_t rndis_pkt_paddr; + struct rndis_packet_msg *rndis_pkt; + bus_dmamap_t rndis_pkt_dmap; +}; + +#define HN_TXD_FLAG_ONLIST 0x0001 +#define HN_TXD_FLAG_DMAMAP 0x0002 + +struct hn_rxinfo { + uint32_t vlan_info; + uint32_t csum_info; + uint32_t hash_info; + uint32_t hash_value; +}; + +#define HN_RXINFO_VLAN 0x0001 +#define HN_RXINFO_CSUM 0x0002 +#define HN_RXINFO_HASHINF 0x0004 +#define HN_RXINFO_HASHVAL 0x0008 +#define HN_RXINFO_ALL \ + (HN_RXINFO_VLAN | \ + HN_RXINFO_CSUM | \ + HN_RXINFO_HASHINF | \ + HN_RXINFO_HASHVAL) + +#define HN_NDIS_VLAN_INFO_INVALID 0xffffffff +#define HN_NDIS_RXCSUM_INFO_INVALID 0 +#define HN_NDIS_HASH_INFO_INVALID 0 + +static int hn_probe(device_t); +static int hn_attach(device_t); +static int hn_detach(device_t); +static int hn_shutdown(device_t); +static void hn_chan_callback(struct vmbus_channel *, + void *); + +static void hn_init(void *); +static int hn_ioctl(struct ifnet *, u_long, caddr_t); +static void hn_start(struct ifnet *); +static int hn_transmit(struct ifnet *, struct mbuf *); +static void hn_xmit_qflush(struct ifnet *); +static int hn_ifmedia_upd(struct ifnet *); +static void hn_ifmedia_sts(struct ifnet *, + struct ifmediareq *); + +static int hn_rndis_rxinfo(const void *, int, + struct hn_rxinfo *); +static void hn_rndis_rx_data(struct hn_rx_ring *, + const void *, int); +static void hn_rndis_rx_status(struct hn_softc *, + const void *, int); + +static void hn_nvs_handle_notify(struct hn_softc *, + const struct vmbus_chanpkt_hdr *); +static void hn_nvs_handle_comp(struct hn_softc *, + struct vmbus_channel *, + const struct vmbus_chanpkt_hdr *); +static void hn_nvs_handle_rxbuf(struct hn_rx_ring *, + struct vmbus_channel *, + const struct vmbus_chanpkt_hdr *); +static void hn_nvs_ack_rxbuf(struct hn_rx_ring *, + struct vmbus_channel *, uint64_t); + +#if __FreeBSD_version >= 1100099 +static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); +#endif +static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); +#if __FreeBSD_version < 1100095 +static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS); +#else +static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); +#endif +static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS); + +static void hn_stop(struct hn_softc *); +static void hn_init_locked(struct hn_softc *); +static int hn_chan_attach(struct hn_softc *, + struct vmbus_channel *); +static void hn_chan_detach(struct hn_softc *, + struct vmbus_channel *); +static int hn_attach_subchans(struct hn_softc *); +static void hn_detach_allchans(struct hn_softc *); +static void hn_chan_rollup(struct hn_rx_ring *, + struct hn_tx_ring *); +static void hn_set_ring_inuse(struct hn_softc *, int); +static int hn_synth_attach(struct hn_softc *, int); +static void hn_synth_detach(struct hn_softc *); +static int hn_synth_alloc_subchans(struct hn_softc *, + int *); +static void hn_suspend(struct hn_softc *); +static void hn_suspend_data(struct hn_softc *); +static void hn_suspend_mgmt(struct hn_softc *); +static void hn_resume(struct hn_softc *); +static void hn_resume_data(struct hn_softc *); +static void hn_resume_mgmt(struct hn_softc *); +static void hn_suspend_mgmt_taskfunc(void *, int); +static void hn_chan_drain(struct vmbus_channel *); + +static void hn_update_link_status(struct hn_softc *); +static void hn_change_network(struct hn_softc *); +static void hn_link_taskfunc(void *, int); +static void hn_netchg_init_taskfunc(void *, int); +static void hn_netchg_status_taskfunc(void *, int); +static void hn_link_status(struct hn_softc *); + +static int hn_create_rx_data(struct hn_softc *, int); +static void hn_destroy_rx_data(struct hn_softc *); +static int hn_check_iplen(const struct mbuf *, int); +static int hn_set_rxfilter(struct hn_softc *); +static int hn_rss_reconfig(struct hn_softc *); +static void hn_rss_ind_fixup(struct hn_softc *, int); +static int hn_rxpkt(struct hn_rx_ring *, const void *, + int, const struct hn_rxinfo *); + +static int hn_tx_ring_create(struct hn_softc *, int); +static void hn_tx_ring_destroy(struct hn_tx_ring *); +static int hn_create_tx_data(struct hn_softc *, int); +static void hn_fixup_tx_data(struct hn_softc *); +static void hn_destroy_tx_data(struct hn_softc *); +static void hn_txdesc_dmamap_destroy(struct hn_txdesc *); +static int hn_encap(struct hn_tx_ring *, + struct hn_txdesc *, struct mbuf **); +static int hn_txpkt(struct ifnet *, struct hn_tx_ring *, + struct hn_txdesc *); +static void hn_set_chim_size(struct hn_softc *, int); +static void hn_set_tso_maxsize(struct hn_softc *, int, int); +static bool hn_tx_ring_pending(struct hn_tx_ring *); +static void hn_tx_ring_qflush(struct hn_tx_ring *); +static void hn_resume_tx(struct hn_softc *, int); +static int hn_get_txswq_depth(const struct hn_tx_ring *); +static void hn_txpkt_done(struct hn_nvs_sendctx *, + struct hn_softc *, struct vmbus_channel *, + const void *, int); +static int hn_txpkt_sglist(struct hn_tx_ring *, + struct hn_txdesc *); +static int hn_txpkt_chim(struct hn_tx_ring *, + struct hn_txdesc *); +static int hn_xmit(struct hn_tx_ring *, int); +static void hn_xmit_taskfunc(void *, int); +static void hn_xmit_txeof(struct hn_tx_ring *); +static void hn_xmit_txeof_taskfunc(void *, int); +static int hn_start_locked(struct hn_tx_ring *, int); +static void hn_start_taskfunc(void *, int); +static void hn_start_txeof(struct hn_tx_ring *); +static void hn_start_txeof_taskfunc(void *, int); + +SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, + "Hyper-V network interface"); + +/* Trust tcp segements verification on host side. */ +static int hn_trust_hosttcp = 1; +SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, + &hn_trust_hosttcp, 0, + "Trust tcp segement verification on host side, " + "when csum info is missing (global setting)"); + +/* Trust udp datagrams verification on host side. */ +static int hn_trust_hostudp = 1; +SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, + &hn_trust_hostudp, 0, + "Trust udp datagram verification on host side, " + "when csum info is missing (global setting)"); + +/* Trust ip packets verification on host side. */ +static int hn_trust_hostip = 1; +SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, + &hn_trust_hostip, 0, + "Trust ip packet verification on host side, " + "when csum info is missing (global setting)"); + +/* Limit TSO burst size */ +static int hn_tso_maxlen = IP_MAXPACKET; +SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, + &hn_tso_maxlen, 0, "TSO burst limit"); + +/* Limit chimney send size */ +static int hn_tx_chimney_size = 0; +SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, + &hn_tx_chimney_size, 0, "Chimney send packet size limit"); + +/* Limit the size of packet for direct transmission */ +static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; +SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, + &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); + +/* # of LRO entries per RX ring */ +#if defined(INET) || defined(INET6) +#if __FreeBSD_version >= 1100095 +static int hn_lro_entry_count = HN_LROENT_CNT_DEF; +SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, + &hn_lro_entry_count, 0, "LRO entry count"); +#endif +#endif + +/* Use shared TX taskqueue */ +static int hn_share_tx_taskq = 0; +SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, + &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); + +#ifndef HN_USE_TXDESC_BUFRING +static int hn_use_txdesc_bufring = 0; +#else +static int hn_use_txdesc_bufring = 1; +#endif +SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, + &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); + +/* Bind TX taskqueue to the target CPU */ +static int hn_bind_tx_taskq = -1; +SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, + &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); + +/* Use ifnet.if_start instead of ifnet.if_transmit */ +static int hn_use_if_start = 0; +SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, + &hn_use_if_start, 0, "Use if_start TX method"); + +/* # of channels to use */ +static int hn_chan_cnt = 0; +SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, + &hn_chan_cnt, 0, + "# of channels to use; each channel has one RX ring and one TX ring"); + +/* # of transmit rings to use */ +static int hn_tx_ring_cnt = 0; +SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, + &hn_tx_ring_cnt, 0, "# of TX rings to use"); + +/* Software TX ring deptch */ +static int hn_tx_swq_depth = 0; +SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, + &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); + +/* Enable sorted LRO, and the depth of the per-channel mbuf queue */ +#if __FreeBSD_version >= 1100095 +static u_int hn_lro_mbufq_depth = 0; +SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, + &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); +#endif + +static u_int hn_cpu_index; /* next CPU for channel */ +static struct taskqueue *hn_tx_taskq; /* shared TX taskqueue */ + +static const uint8_t +hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = { + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa +}; + +static device_method_t hn_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, hn_probe), + DEVMETHOD(device_attach, hn_attach), + DEVMETHOD(device_detach, hn_detach), + DEVMETHOD(device_shutdown, hn_shutdown), + DEVMETHOD_END +}; + +static driver_t hn_driver = { + "hn", + hn_methods, + sizeof(struct hn_softc) +}; + +static devclass_t hn_devclass; + +DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0); +MODULE_VERSION(hn, 1); +MODULE_DEPEND(hn, vmbus, 1, 1, 1); + +#if __FreeBSD_version >= 1100099 +static void +hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) +{ + int i; + + for (i = 0; i < sc->hn_rx_ring_inuse; ++i) + sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; +} +#endif + +static int +hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd) +{ + + KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID && + txd->chim_size == 0, ("invalid rndis sglist txd")); + return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, + &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt)); +} + +static int +hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd) +{ + struct hn_nvs_rndis rndis; + + KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID && + txd->chim_size > 0, ("invalid rndis chim txd")); + + rndis.nvs_type = HN_NVS_TYPE_RNDIS; + rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA; + rndis.nvs_chim_idx = txd->chim_index; + rndis.nvs_chim_sz = txd->chim_size; + + return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC, + &rndis, sizeof(rndis), &txd->send_ctx)); +} + +static __inline uint32_t +hn_chim_alloc(struct hn_softc *sc) +{ + int i, bmap_cnt = sc->hn_chim_bmap_cnt; + u_long *bmap = sc->hn_chim_bmap; + uint32_t ret = HN_NVS_CHIM_IDX_INVALID; + + for (i = 0; i < bmap_cnt; ++i) { + int idx; + + idx = ffsl(~bmap[i]); + if (idx == 0) + continue; + + --idx; /* ffsl is 1-based */ + KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt, + ("invalid i %d and idx %d", i, idx)); + + if (atomic_testandset_long(&bmap[i], idx)) + continue; + + ret = i * LONG_BIT + idx; + break; + } + return (ret); +} + +static __inline void +hn_chim_free(struct hn_softc *sc, uint32_t chim_idx) +{ + u_long mask; + uint32_t idx; + + idx = chim_idx / LONG_BIT; + KASSERT(idx < sc->hn_chim_bmap_cnt, + ("invalid chimney index 0x%x", chim_idx)); + + mask = 1UL << (chim_idx % LONG_BIT); + KASSERT(sc->hn_chim_bmap[idx] & mask, + ("index bitmap 0x%lx, chimney index %u, " + "bitmap idx %d, bitmask 0x%lx", + sc->hn_chim_bmap[idx], chim_idx, idx, mask)); + + atomic_clear_long(&sc->hn_chim_bmap[idx], mask); +} + +static int +hn_set_rxfilter(struct hn_softc *sc) +{ + struct ifnet *ifp = sc->hn_ifp; + uint32_t filter; + int error = 0; + + HN_LOCK_ASSERT(sc); + + if (ifp->if_flags & IFF_PROMISC) { + filter = NDIS_PACKET_TYPE_PROMISCUOUS; + } else { + filter = NDIS_PACKET_TYPE_DIRECTED; + if (ifp->if_flags & IFF_BROADCAST) + filter |= NDIS_PACKET_TYPE_BROADCAST; +#ifdef notyet + /* + * See the comment in SIOCADDMULTI/SIOCDELMULTI. + */ + /* TODO: support multicast list */ + if ((ifp->if_flags & IFF_ALLMULTI) || + !TAILQ_EMPTY(&ifp->if_multiaddrs)) + filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; +#else + /* Always enable ALLMULTI */ + filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; +#endif + } + + if (sc->hn_rx_filter != filter) { + error = hn_rndis_set_rxfilter(sc, filter); + if (!error) + sc->hn_rx_filter = filter; + } + return (error); +} + +static int +hn_get_txswq_depth(const struct hn_tx_ring *txr) +{ + + KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); + if (hn_tx_swq_depth < txr->hn_txdesc_cnt) + return txr->hn_txdesc_cnt; + return hn_tx_swq_depth; +} + +static int +hn_rss_reconfig(struct hn_softc *sc) +{ + int error; + + HN_LOCK_ASSERT(sc); + + if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) + return (ENXIO); + + /* + * Disable RSS first. + * + * NOTE: + * Direct reconfiguration by setting the UNCHG flags does + * _not_ work properly. + */ + if (bootverbose) + if_printf(sc->hn_ifp, "disable RSS\n"); + error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE); + if (error) { + if_printf(sc->hn_ifp, "RSS disable failed\n"); + return (error); + } + + /* + * Reenable the RSS w/ the updated RSS key or indirect + * table. + */ + if (bootverbose) + if_printf(sc->hn_ifp, "reconfig RSS\n"); + error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE); + if (error) { + if_printf(sc->hn_ifp, "RSS reconfig failed\n"); + return (error); + } + return (0); +} + +static void +hn_rss_ind_fixup(struct hn_softc *sc, int nchan) +{ + struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; + int i; + + KASSERT(nchan > 1, ("invalid # of channels %d", nchan)); + + /* + * Check indirect table to make sure that all channels in it + * can be used. + */ + for (i = 0; i < NDIS_HASH_INDCNT; ++i) { + if (rss->rss_ind[i] >= nchan) { + if_printf(sc->hn_ifp, + "RSS indirect table %d fixup: %u -> %d\n", + i, rss->rss_ind[i], nchan - 1); + rss->rss_ind[i] = nchan - 1; + } + } +} + +static int +hn_ifmedia_upd(struct ifnet *ifp __unused) +{ + + return EOPNOTSUPP; +} + +static void +hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) +{ + struct hn_softc *sc = ifp->if_softc; + + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + + if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) { + ifmr->ifm_active |= IFM_NONE; + return; + } + ifmr->ifm_status |= IFM_ACTIVE; + ifmr->ifm_active |= IFM_10G_T | IFM_FDX; +} + +/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ +static const struct hyperv_guid g_net_vsc_device_type = { + .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, + 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} +}; + +static int +hn_probe(device_t dev) +{ + + if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, + &g_net_vsc_device_type) == 0) { + device_set_desc(dev, "Hyper-V Network Interface"); + return BUS_PROBE_DEFAULT; + } + return ENXIO; +} + +static void +hn_cpuset_setthread_task(void *xmask, int pending __unused) +{ + cpuset_t *mask = xmask; + int error; + + error = cpuset_setthread(curthread->td_tid, mask); + if (error) { + panic("curthread=%ju: can't pin; error=%d", + (uintmax_t)curthread->td_tid, error); + } +} + +static int +hn_attach(device_t dev) +{ + struct hn_softc *sc = device_get_softc(dev); + struct sysctl_oid_list *child; + struct sysctl_ctx_list *ctx; + uint8_t eaddr[ETHER_ADDR_LEN]; + struct ifnet *ifp = NULL; + int error, ring_cnt, tx_ring_cnt; + + sc->hn_dev = dev; + sc->hn_prichan = vmbus_get_channel(dev); + HN_LOCK_INIT(sc); + + /* + * Setup taskqueue for transmission. + */ + if (hn_tx_taskq == NULL) { + sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, + taskqueue_thread_enqueue, &sc->hn_tx_taskq); + taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", + device_get_nameunit(dev)); + if (hn_bind_tx_taskq >= 0) { + int cpu = hn_bind_tx_taskq; + struct task cpuset_task; + cpuset_t cpu_set; + + if (cpu > mp_ncpus - 1) + cpu = mp_ncpus - 1; + CPU_SETOF(cpu, &cpu_set); + TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, + &cpu_set); + taskqueue_enqueue(sc->hn_tx_taskq, &cpuset_task); + taskqueue_drain(sc->hn_tx_taskq, &cpuset_task); + } + } else { + sc->hn_tx_taskq = hn_tx_taskq; + } + + /* + * Setup taskqueue for mangement tasks, e.g. link status. + */ + sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK, + taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0); + taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt", + device_get_nameunit(dev)); + TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc); + TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc); + TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, + hn_netchg_status_taskfunc, sc); + + /* + * Allocate ifnet and setup its name earlier, so that if_printf + * can be used by functions, which will be called after + * ether_ifattach(). + */ + ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER); + ifp->if_softc = sc; + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + + /* + * Initialize ifmedia earlier so that it can be unconditionally + * destroyed, if error happened later on. + */ + ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); + + /* + * Figure out the # of RX rings (ring_cnt) and the # of TX rings + * to use (tx_ring_cnt). + * + * NOTE: + * The # of RX rings to use is same as the # of channels to use. + */ + ring_cnt = hn_chan_cnt; + if (ring_cnt <= 0) { + /* Default */ + ring_cnt = mp_ncpus; + if (ring_cnt > HN_RING_CNT_DEF_MAX) + ring_cnt = HN_RING_CNT_DEF_MAX; + } else if (ring_cnt > mp_ncpus) { + ring_cnt = mp_ncpus; + } + + tx_ring_cnt = hn_tx_ring_cnt; + if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) + tx_ring_cnt = ring_cnt; + if (hn_use_if_start) { + /* ifnet.if_start only needs one TX ring. */ + tx_ring_cnt = 1; + } + + /* + * Set the leader CPU for channels. + */ + sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; + + /* + * Create enough TX/RX rings, even if only limited number of + * channels can be allocated. + */ + error = hn_create_tx_data(sc, tx_ring_cnt); + if (error) + goto failed; + error = hn_create_rx_data(sc, ring_cnt); + if (error) + goto failed; + + /* + * Create transaction context for NVS and RNDIS transactions. + */ + sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), + HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); + if (sc->hn_xact == NULL) + goto failed; + + /* + * Attach the synthetic parts, i.e. NVS and RNDIS. + */ + error = hn_synth_attach(sc, ETHERMTU); + if (error) + goto failed; + + error = hn_rndis_get_eaddr(sc, eaddr); + if (error) + goto failed; + +#if __FreeBSD_version >= 1100099 + if (sc->hn_rx_ring_inuse > 1) { + /* + * Reduce TCP segment aggregation limit for multiple + * RX rings to increase ACK timeliness. + */ + hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); + } +#endif + + /* + * Fixup TX stuffs after synthetic parts are attached. + */ + hn_fixup_tx_data(sc); + + ctx = device_get_sysctl_ctx(dev); + child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, + &sc->hn_nvs_ver, 0, "NVS version"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, + hn_ndis_version_sysctl, "A", "NDIS version"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, + hn_caps_sysctl, "A", "capabilities"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, + hn_hwassist_sysctl, "A", "hwassist"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, + hn_rxfilter_sysctl, "A", "rxfilter"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, + hn_rss_hash_sysctl, "A", "RSS hash"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size", + CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key", + CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, + hn_rss_key_sysctl, "IU", "RSS key"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind", + CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, + hn_rss_ind_sysctl, "IU", "RSS indirect table"); + + /* + * Setup the ifmedia, which has been initialized earlier. + */ + ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); + /* XXX ifmedia_set really should do this for us */ + sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; + + /* + * Setup the ifnet for this interface. + */ + +#ifdef __LP64__ + ifp->if_baudrate = IF_Gbps(10); +#else + /* if_baudrate is 32bits on 32bit system. */ + ifp->if_baudrate = IF_Gbps(1); +#endif + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = hn_ioctl; + ifp->if_init = hn_init; + if (hn_use_if_start) { + int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); + + ifp->if_start = hn_start; + IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); + ifp->if_snd.ifq_drv_maxlen = qdepth - 1; + IFQ_SET_READY(&ifp->if_snd); + } else { + ifp->if_transmit = hn_transmit; + ifp->if_qflush = hn_xmit_qflush; + } + + ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO; +#ifdef foo + /* We can't diff IPv6 packets from IPv4 packets on RX path. */ + ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; +#endif + if (sc->hn_caps & HN_CAP_VLAN) { + /* XXX not sure about VLAN_MTU. */ + ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; + } + + ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist; + if (ifp->if_hwassist & HN_CSUM_IP_MASK) + ifp->if_capabilities |= IFCAP_TXCSUM; + if (ifp->if_hwassist & HN_CSUM_IP6_MASK) + ifp->if_capabilities |= IFCAP_TXCSUM_IPV6; + if (sc->hn_caps & HN_CAP_TSO4) { + ifp->if_capabilities |= IFCAP_TSO4; + ifp->if_hwassist |= CSUM_IP_TSO; + } + if (sc->hn_caps & HN_CAP_TSO6) { + ifp->if_capabilities |= IFCAP_TSO6; + ifp->if_hwassist |= CSUM_IP6_TSO; + } + + /* Enable all available capabilities by default. */ + ifp->if_capenable = ifp->if_capabilities; + + if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { + hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); + ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; + ifp->if_hw_tsomaxsegsize = PAGE_SIZE; + } + + ether_ifattach(ifp, eaddr); + + if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) { + if_printf(ifp, "TSO segcnt %u segsz %u\n", + ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); + } + + /* Inform the upper layer about the long frame support. */ + ifp->if_hdrlen = sizeof(struct ether_vlan_header); + + /* + * Kick off link status check. + */ + sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; + hn_update_link_status(sc); + + return (0); +failed: + if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) + hn_synth_detach(sc); + hn_detach(dev); + return (error); +} + +static int +hn_detach(device_t dev) +{ + struct hn_softc *sc = device_get_softc(dev); + struct ifnet *ifp = sc->hn_ifp; + + if (device_is_attached(dev)) { + HN_LOCK(sc); + if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { + if (ifp->if_drv_flags & IFF_DRV_RUNNING) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201611110725.uAB7PEvt087134>