Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 17 Aug 2017 03:48:51 +0000 (UTC)
From:      Sepherosa Ziehau <sephe@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r322605 - stable/11/sys/dev/hyperv/netvsc
Message-ID:  <201708170348.v7H3mpWN099609@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: sephe
Date: Thu Aug 17 03:48:50 2017
New Revision: 322605
URL: https://svnweb.freebsd.org/changeset/base/322605

Log:
  MFC 322483,322485-322487
  
  322483
      hyperv/hn: Update VF's ibytes properly under transparent VF mode.
  
      While, I'm here add comment about why updating VF's imcast stat is
      not necessary.
  
      Sponsored by:   Microsoft
      Differential Revision:  https://reviews.freebsd.org/D11948
  
  322485
      hyperv/hn: Fix/enhance receiving path when VF is activated.
  
      - Update hn(4)'s stats properly for non-transparent mode VF.
      - Allow BPF tapping to hn(4) for non-transparent mode VF.
      - Don't setup mbuf hash, if 'options RSS' is set.
        In Azure, when VF is activated, TCP SYN and SYN|ACK go through hn(4)
        while the rest of segments and ACKs belonging to the same TCP 4-tuple
        go through the VF.  So don't setup mbuf hash, if a VF is activated
        and 'options RSS' is not enabled.  hn(4) and the VF may use neither
        the same RSS hash key nor the same RSS hash function, so the hash
        value for packets belonging to the same flow could be different!
      - Disable LRO.
        hn(4) will only receive broadcast packets, multicast packets, TCP
        SYN and SYN|ACK (in Azure), LRO is useless for these packet types.
        For non-transparent, we definitely _cannot_ enable LRO at all, since
        the LRO flush will use hn(4) as the receiving interface; i.e.
        hn_ifp->if_input(hn_ifp, m).
  
      While I'm here, remove unapplied comment and minor style change.
  
      Sponsored by:   Microsoft
      Differential Revision:  https://reviews.freebsd.org/D11978
  
  322486
      hyperv/hn: Minor cleanup
  
      Sponsored by:   Microsoft
      Differential Revision:  https://reviews.freebsd.org/D11979
  
  322487
      hyperv/hn: Re-set datapath after synthetic parts reattached.
  
      Do this even for non-transparent mode VF. Better safe than sorry.
  
      Sponsored by:   Microsoft
      Differential Revision:  https://reviews.freebsd.org/D11981

Modified:
  stable/11/sys/dev/hyperv/netvsc/if_hn.c
  stable/11/sys/dev/hyperv/netvsc/if_hnvar.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/dev/hyperv/netvsc/if_hn.c
==============================================================================
--- stable/11/sys/dev/hyperv/netvsc/if_hn.c	Thu Aug 17 01:31:37 2017	(r322604)
+++ stable/11/sys/dev/hyperv/netvsc/if_hn.c	Thu Aug 17 03:48:50 2017	(r322605)
@@ -282,6 +282,8 @@ static bool			hn_xpnt_vf_isready(struct hn_softc *);
 static void			hn_xpnt_vf_setready(struct hn_softc *);
 static void			hn_xpnt_vf_init_taskfunc(void *, int);
 static void			hn_xpnt_vf_init(struct hn_softc *);
+static void			hn_xpnt_vf_setenable(struct hn_softc *);
+static void			hn_xpnt_vf_setdisable(struct hn_softc *, bool);
 
 static int			hn_rndis_rxinfo(const void *, int,
 				    struct hn_rxinfo *);
@@ -578,6 +580,12 @@ hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
 };
 #endif	/* !RSS */
 
+static const struct hyperv_guid	hn_guid = {
+	.hv_guid = {
+	    0x63, 0x51, 0x61, 0xf8, 0x3e, 0xdf, 0xc5, 0x46,
+	    0x91, 0x3f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e }
+};
+
 static device_method_t hn_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		hn_probe),
@@ -1266,16 +1274,37 @@ hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m)
 	rm_runlock(&hn_vfmap_lock, &pt);
 
 	if (hn_ifp != NULL) {
-		/*
-		 * Fix up rcvif and go through hn(4)'s if_input and 
-		 * increase ipackets.
-		 */
 		for (mn = m; mn != NULL; mn = mn->m_nextpkt) {
-			/* Allow tapping on the VF. */
+			/*
+			 * Allow tapping on the VF.
+			 */
 			ETHER_BPF_MTAP(vf_ifp, mn);
+
+			/*
+			 * Update VF stats.
+			 */
+			if ((vf_ifp->if_capenable & IFCAP_HWSTATS) == 0) {
+				if_inc_counter(vf_ifp, IFCOUNTER_IBYTES,
+				    mn->m_pkthdr.len);
+			}
+			/*
+			 * XXX IFCOUNTER_IMCAST
+			 * This stat updating is kinda invasive, since it
+			 * requires two checks on the mbuf: the length check
+			 * and the ethernet header check.  As of this write,
+			 * all multicast packets go directly to hn(4), which
+			 * makes imcast stat updating in the VF a try in vian.
+			 */
+
+			/*
+			 * Fix up rcvif and increase hn(4)'s ipackets.
+			 */
 			mn->m_pkthdr.rcvif = hn_ifp;
 			if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
 		}
+		/*
+		 * Go through hn(4)'s if_input.
+		 */
 		hn_ifp->if_input(hn_ifp, m);
 	} else {
 		/*
@@ -1406,6 +1435,40 @@ hn_xpnt_vf_isready(struct hn_softc *sc)
 }
 
 static void
+hn_xpnt_vf_setenable(struct hn_softc *sc)
+{
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+	rm_wlock(&sc->hn_vf_lock);
+	sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED;
+	rm_wunlock(&sc->hn_vf_lock);
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		sc->hn_rx_ring[i].hn_rx_flags |= HN_RX_FLAG_XPNT_VF;
+}
+
+static void
+hn_xpnt_vf_setdisable(struct hn_softc *sc, bool clear_vf)
+{
+	int i;
+
+	HN_LOCK_ASSERT(sc);
+
+	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+	rm_wlock(&sc->hn_vf_lock);
+	sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
+	if (clear_vf)
+		sc->hn_vf_ifp = NULL;
+	rm_wunlock(&sc->hn_vf_lock);
+
+	for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
+		sc->hn_rx_ring[i].hn_rx_flags &= ~HN_RX_FLAG_XPNT_VF;
+}
+
+static void
 hn_xpnt_vf_init(struct hn_softc *sc)
 {
 	int error;
@@ -1438,10 +1501,8 @@ hn_xpnt_vf_init(struct hn_softc *sc)
 	 */
 	hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
 
-	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
-	rm_wlock(&sc->hn_vf_lock);
-	sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED;
-	rm_wunlock(&sc->hn_vf_lock);
+	/* Mark transparent mode VF as enabled. */
+	hn_xpnt_vf_setenable(sc);
 }
 
 static void
@@ -1627,11 +1688,8 @@ hn_ifnet_detevent(void *xsc, struct ifnet *ifp)
 		hn_resume_mgmt(sc);
 	}
 
-	/* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
-	rm_wlock(&sc->hn_vf_lock);
-	sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
-	sc->hn_vf_ifp = NULL;
-	rm_wunlock(&sc->hn_vf_lock);
+	/* Mark transparent mode VF as disabled. */
+	hn_xpnt_vf_setdisable(sc, true /* clear hn_vf_ifp */);
 
 	rm_wlock(&hn_vfmap_lock);
 
@@ -1659,18 +1717,11 @@ hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int li
 		if_link_state_change(sc->hn_ifp, link_state);
 }
 
-/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
-static const struct hyperv_guid g_net_vsc_device_type = {
-	.hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
-		0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
-};
-
 static int
 hn_probe(device_t dev)
 {
 
-	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
-	    &g_net_vsc_device_type) == 0) {
+	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &hn_guid) == 0) {
 		device_set_desc(dev, "Hyper-V Network Interface");
 		return BUS_PROBE_DEFAULT;
 	}
@@ -2973,13 +3024,16 @@ static int
 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
     const struct hn_rxinfo *info)
 {
-	struct ifnet *ifp;
+	struct ifnet *ifp, *hn_ifp = rxr->hn_ifp;
 	struct mbuf *m_new;
 	int size, do_lro = 0, do_csum = 1;
 	int hash_type;
 
-	/* If the VF is active, inject the packet through the VF */
-	ifp = rxr->hn_rxvf_ifp ? rxr->hn_rxvf_ifp : rxr->hn_ifp;
+	/*
+	 * If the non-transparent mode VF is active, inject this packet
+	 * into the VF.
+	 */
+	ifp = rxr->hn_rxvf_ifp ? rxr->hn_rxvf_ifp : hn_ifp;
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		/*
@@ -2993,10 +3047,15 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int
 		return (0);
 	}
 
+	if (__predict_false(dlen < ETHER_HDR_LEN)) {
+		if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1);
+		return (0);
+	}
+
 	if (dlen <= MHLEN) {
 		m_new = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
-			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+			if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 		memcpy(mtod(m_new, void *), data, dlen);
@@ -3017,7 +3076,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int
 
 		m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
 		if (m_new == NULL) {
-			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
+			if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
 
@@ -3025,7 +3084,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int
 	}
 	m_new->m_pkthdr.rcvif = ifp;
 
-	if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
+	if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0))
 		do_csum = 0;
 
 	/* receive side checksum offload */
@@ -3066,8 +3125,9 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int
 		int hoff;
 
 		hoff = sizeof(*eh);
-		if (m_new->m_len < hoff)
-			goto skip;
+		/* Checked at the beginning of this function. */
+		KASSERT(m_new->m_len >= hoff, ("not ethernet frame"));
+
 		eh = mtod(m_new, struct ether_header *);
 		etype = ntohs(eh->ether_type);
 		if (etype == ETHERTYPE_VLAN) {
@@ -3122,6 +3182,37 @@ skip:
 		m_new->m_flags |= M_VLANTAG;
 	}
 
+	/*
+	 * If VF is activated (tranparent/non-transparent mode does not
+	 * matter here).
+	 *
+	 * - Don't setup mbuf hash, if 'options RSS' is set.
+	 *
+	 *   In Azure, when VF is activated, TCP SYN and SYN|ACK go
+	 *   through hn(4) while the rest of segments and ACKs belonging
+	 *   to the same TCP 4-tuple go through the VF.  So don't setup
+	 *   mbuf hash, if a VF is activated and 'options RSS' is not
+	 *   enabled.  hn(4) and the VF may use neither the same RSS
+	 *   hash key nor the same RSS hash function, so the hash value
+	 *   for packets belonging to the same flow could be different!
+	 *
+	 * - Disable LRO
+	 *
+	 *   hn(4) will only receive broadcast packets, multicast packets,
+	 *   TCP SYN and SYN|ACK (in Azure), LRO is useless for these
+	 *   packet types.
+	 *
+	 *   For non-transparent, we definitely _cannot_ enable LRO at
+	 *   all, since the LRO flush will use hn(4) as the receiving
+	 *   interface; i.e. hn_ifp->if_input(hn_ifp, m).
+	 */
+	if (hn_ifp != ifp || (rxr->hn_rx_flags & HN_RX_FLAG_XPNT_VF)) {
+		do_lro = 0;	/* disable LRO. */
+#ifndef RSS
+		goto skip_hash;	/* skip mbuf hash setup */
+#endif
+	}
+
 	if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
 		rxr->hn_rss_pkts++;
 		m_new->m_pkthdr.flowid = info->hash_value;
@@ -3171,15 +3262,36 @@ skip:
 	}
 	M_HASHTYPE_SET(m_new, hash_type);
 
-	/*
-	 * Note:  Moved RX completion back to hv_nv_on_receive() so all
-	 * messages (not just data messages) will trigger a response.
-	 */
-
+#ifndef RSS
+skip_hash:
+#endif
 	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+	if (hn_ifp != ifp) {
+		const struct ether_header *eh;
+
+		/*
+		 * Non-transparent mode VF is activated.
+		 */
+
+		/*
+		 * Allow tapping on hn(4).
+		 */
+		ETHER_BPF_MTAP(hn_ifp, m_new);
+
+		/*
+		 * Update hn(4)'s stats.
+		 */
+		if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
+		if_inc_counter(hn_ifp, IFCOUNTER_IBYTES, m_new->m_pkthdr.len);
+		/* Checked at the beginning of this function. */
+		KASSERT(m_new->m_len >= ETHER_HDR_LEN, ("not ethernet frame"));
+		eh = mtod(m_new, struct ether_header *);
+		if (ETHER_IS_MULTICAST(eh->ether_dhost))
+			if_inc_counter(hn_ifp, IFCOUNTER_IMCASTS, 1);
+	}
 	rxr->hn_pkts++;
 
-	if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
+	if ((hn_ifp->if_capenable & IFCAP_LRO) && do_lro) {
 #if defined(INET) || defined(INET6)
 		struct lro_ctrl *lro = &rxr->hn_lro;
 
@@ -3192,10 +3304,8 @@ skip:
 		}
 #endif
 	}
+	ifp->if_input(ifp, m_new);
 
-	/* We're not holding the lock here, so don't release it */
-	(*ifp->if_input)(ifp, m_new);
-
 	return (0);
 }
 
@@ -3293,7 +3403,8 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 		 */
 		hn_resume(sc);
 
-		if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
+		if ((sc->hn_flags & HN_FLAG_RXVF) ||
+		    (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) {
 			/*
 			 * Since we have reattached the NVS part,
 			 * change the datapath to VF again; in case
@@ -3490,10 +3601,8 @@ hn_stop(struct hn_softc *sc, bool detaching)
 		KASSERT(sc->hn_vf_ifp != NULL,
 		    ("%s: VF is not attached", ifp->if_xname));
 
-		/* NOTE: hn_vf_lock for hn_transmit() */
-		rm_wlock(&sc->hn_vf_lock);
-		sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
-		rm_wunlock(&sc->hn_vf_lock);
+		/* Mark transparent mode VF as disabled. */
+		hn_xpnt_vf_setdisable(sc, false /* keep hn_vf_ifp */);
 
 		/*
 		 * NOTE:

Modified: stable/11/sys/dev/hyperv/netvsc/if_hnvar.h
==============================================================================
--- stable/11/sys/dev/hyperv/netvsc/if_hnvar.h	Thu Aug 17 01:31:37 2017	(r322604)
+++ stable/11/sys/dev/hyperv/netvsc/if_hnvar.h	Thu Aug 17 03:48:50 2017	(r322605)
@@ -63,6 +63,7 @@ struct hn_rx_ring {
 	struct hn_tx_ring *hn_txr;
 	void		*hn_pktbuf;
 	int		hn_pktbuf_len;
+	int		hn_rx_flags;	/* HN_RX_FLAG_ */
 	uint8_t		*hn_rxbuf;	/* shadow sc->hn_rxbuf */
 	int		hn_rx_idx;
 
@@ -82,7 +83,6 @@ struct hn_rx_ring {
 
 	/* Rarely used stuffs */
 	struct sysctl_oid *hn_rx_sysctl_tree;
-	int		hn_rx_flags;
 
 	void		*hn_br;		/* TX/RX bufring */
 	struct hyperv_dma hn_br_dma;
@@ -96,6 +96,7 @@ struct hn_rx_ring {
 
 #define HN_RX_FLAG_ATTACHED	0x0001
 #define HN_RX_FLAG_BR_REF	0x0002
+#define HN_RX_FLAG_XPNT_VF	0x0004
 
 struct hn_tx_ring {
 #ifndef HN_USE_TXDESC_BUFRING



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201708170348.v7H3mpWN099609>