Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 29 Mar 2021 09:12:13 GMT
From:      Wei Hu <whu@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 4150446f6467 - stable/12 - Hyper-V: hn: Enable vSwitch RSC support in hn netvsc driver
Message-ID:  <202103290912.12T9CDTY052724@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/12 has been updated by whu:

URL: https://cgit.FreeBSD.org/src/commit/?id=4150446f6467f585595803db211f58152cd3587a

commit 4150446f6467f585595803db211f58152cd3587a
Author:     Wei Hu <whu@FreeBSD.org>
AuthorDate: 2021-03-12 04:35:16 +0000
Commit:     Wei Hu <whu@FreeBSD.org>
CommitDate: 2021-03-29 08:55:18 +0000

    Hyper-V: hn: Enable vSwitch RSC support in hn netvsc driver
    
    Receive Segment Coalescing (RSC) in the vSwitch is a feature available in
    Windows Server 2019 hosts and later. It reduces the per packet processing
    overhead by coalescing multiple TCP segments when possible. This happens
    mostly when TCP traffics are among different guests on same host.
    This patch adds netvsc driver support for this feature.
    
    The patch also updates NVS version to 6.1 as needed for RSC
    enablement.
    
    MFC after:      2 weeks
    Sponsored by:   Microsoft
    Differential Revision:  https://reviews.freebsd.org/D29075
    
    (cherry picked from commit a491581f3f8df07cdff0236bd556895205929af4)
---
 sys/dev/hyperv/netvsc/hn_nvs.c   |   5 +
 sys/dev/hyperv/netvsc/hn_rndis.c |  25 ++++
 sys/dev/hyperv/netvsc/if_hn.c    | 258 ++++++++++++++++++++++++++++-----------
 sys/dev/hyperv/netvsc/if_hnreg.h |  14 +++
 sys/dev/hyperv/netvsc/if_hnvar.h |  19 ++-
 sys/dev/hyperv/netvsc/ndis.h     |  31 +++--
 sys/net/rndis.h                  |   7 +-
 7 files changed, 269 insertions(+), 90 deletions(-)

diff --git a/sys/dev/hyperv/netvsc/hn_nvs.c b/sys/dev/hyperv/netvsc/hn_nvs.c
index 73a112c4e5e1..4dbc28996617 100644
--- a/sys/dev/hyperv/netvsc/hn_nvs.c
+++ b/sys/dev/hyperv/netvsc/hn_nvs.c
@@ -80,6 +80,8 @@ struct hn_nvs_sendctx		hn_nvs_sendctx_none =
     HN_NVS_SENDCTX_INITIALIZER(hn_nvs_sent_none, NULL);
 
 static const uint32_t		hn_nvs_version[] = {
+	HN_NVS_VERSION_61,
+	HN_NVS_VERSION_6,
 	HN_NVS_VERSION_5,
 	HN_NVS_VERSION_4,
 	HN_NVS_VERSION_2,
@@ -508,6 +510,9 @@ hn_nvs_conf_ndis(struct hn_softc *sc, int mtu)
 	conf.nvs_caps = HN_NVS_NDIS_CONF_VLAN;
 	if (sc->hn_nvs_ver >= HN_NVS_VERSION_5)
 		conf.nvs_caps |= HN_NVS_NDIS_CONF_SRIOV;
+	if (sc->hn_nvs_ver >= HN_NVS_VERSION_61)
+		conf.nvs_caps |= HN_NVS_NDIS_CONF_RSC;
+
 
 	/* NOTE: No response. */
 	error = hn_nvs_req_send(sc, &conf, sizeof(conf));
diff --git a/sys/dev/hyperv/netvsc/hn_rndis.c b/sys/dev/hyperv/netvsc/hn_rndis.c
index b9bf683fe811..794a82cf3957 100644
--- a/sys/dev/hyperv/netvsc/hn_rndis.c
+++ b/sys/dev/hyperv/netvsc/hn_rndis.c
@@ -723,6 +723,17 @@ hn_rndis_conf_offload(struct hn_softc *sc, int mtu)
 			params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX;
 	}
 
+	/* RSC offload */
+	if (hwcaps.ndis_hdr.ndis_rev >= NDIS_OFFLOAD_PARAMS_REV_3) {
+		if (hwcaps.ndis_rsc.ndis_ip4 && hwcaps.ndis_rsc.ndis_ip6) {
+			params.ndis_rsc_ip4 = NDIS_OFFLOAD_RSC_ON;
+			params.ndis_rsc_ip6 = NDIS_OFFLOAD_RSC_ON;
+		} else {
+			params.ndis_rsc_ip4 = NDIS_OFFLOAD_RSC_OFF;
+			params.ndis_rsc_ip6 = NDIS_OFFLOAD_RSC_OFF;
+		}
+	}
+
 	if (bootverbose) {
 		if_printf(sc->hn_ifp, "offload csum: "
 		    "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n",
@@ -734,6 +745,10 @@ hn_rndis_conf_offload(struct hn_softc *sc, int mtu)
 		if_printf(sc->hn_ifp, "offload lsov2: ip4 %u, ip6 %u\n",
 		    params.ndis_lsov2_ip4,
 		    params.ndis_lsov2_ip6);
+		if (hwcaps.ndis_hdr.ndis_rev >= NDIS_OFFLOAD_PARAMS_REV_3)
+			if_printf(sc->hn_ifp, "offload rsc: ip4 %u, ip6 %u\n",
+			    params.ndis_rsc_ip4,
+			    params.ndis_rsc_ip6);
 	}
 
 	error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, &params, paramsz);
@@ -969,6 +984,11 @@ hn_rndis_query_hwcaps(struct hn_softc *sc, struct ndis_offload *caps)
 		if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n",
 		    caps->ndis_hdr.ndis_size);
 		return (EINVAL);
+	} else if (caps->ndis_hdr.ndis_rev >= NDIS_OFFLOAD_REV_3 &&
+		   caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE) {
+		if_printf(sc->hn_ifp, "invalid NDIS rev3 objsize %u\n",
+		    caps->ndis_hdr.ndis_size);
+		return (EINVAL);
 	}
 
 	if (bootverbose) {
@@ -1001,6 +1021,11 @@ hn_rndis_query_hwcaps(struct hn_softc *sc, struct ndis_offload *caps)
 		    caps->ndis_lsov2.ndis_ip6_minsg,
 		    caps->ndis_lsov2.ndis_ip6_encap,
 		    caps->ndis_lsov2.ndis_ip6_opts);
+		if (caps->ndis_hdr.ndis_rev >= NDIS_OFFLOAD_REV_3)
+			if_printf(sc->hn_ifp, "hwcaps rsc: "
+			    "ip4 %u ip6 %u\n",
+			    caps->ndis_rsc.ndis_ip4,
+			    caps->ndis_rsc.ndis_ip6);
 	}
 	return (0);
 }
diff --git a/sys/dev/hyperv/netvsc/if_hn.c b/sys/dev/hyperv/netvsc/if_hn.c
index b77f3118b777..1e90e8ef14fb 100644
--- a/sys/dev/hyperv/netvsc/if_hn.c
+++ b/sys/dev/hyperv/netvsc/if_hn.c
@@ -223,11 +223,25 @@ struct hn_txdesc {
 #define HN_TXD_FLAG_DMAMAP		0x0002
 #define HN_TXD_FLAG_ONAGG		0x0004
 
+#define	HN_NDIS_PKTINFO_SUBALLOC	0x01
+#define	HN_NDIS_PKTINFO_1ST_FRAG	0x02
+#define	HN_NDIS_PKTINFO_LAST_FRAG	0x04
+
+struct packet_info_id {
+	uint8_t				ver;
+	uint8_t				flag;
+	uint16_t			pkt_id;
+};
+
+#define NDIS_PKTINFOID_SZ		sizeof(struct packet_info_id)
+
+
 struct hn_rxinfo {
-	uint32_t			vlan_info;
-	uint32_t			csum_info;
-	uint32_t			hash_info;
-	uint32_t			hash_value;
+	const uint32_t			*vlan_info;
+	const uint32_t			*csum_info;
+	const uint32_t			*hash_info;
+	const uint32_t			*hash_value;
+	const struct packet_info_id	*pktinfo_id;
 };
 
 struct hn_rxvf_setarg {
@@ -239,15 +253,13 @@ struct hn_rxvf_setarg {
 #define HN_RXINFO_CSUM			0x0002
 #define HN_RXINFO_HASHINF		0x0004
 #define HN_RXINFO_HASHVAL		0x0008
+#define HN_RXINFO_PKTINFO_ID		0x0010
 #define HN_RXINFO_ALL			\
 	(HN_RXINFO_VLAN |		\
 	 HN_RXINFO_CSUM |		\
 	 HN_RXINFO_HASHINF |		\
-	 HN_RXINFO_HASHVAL)
-
-#define HN_NDIS_VLAN_INFO_INVALID	0xffffffff
-#define HN_NDIS_RXCSUM_INFO_INVALID	0
-#define HN_NDIS_HASH_INFO_INVALID	0
+	 HN_RXINFO_HASHVAL |		\
+	 HN_RXINFO_PKTINFO_ID)
 
 static int			hn_probe(device_t);
 static int			hn_attach(device_t);
@@ -396,8 +408,7 @@ static int			hn_rxfilter_config(struct hn_softc *);
 static int			hn_rss_reconfig(struct hn_softc *);
 static void			hn_rss_ind_fixup(struct hn_softc *);
 static void			hn_rss_mbuf_hash(struct hn_softc *, uint32_t);
-static int			hn_rxpkt(struct hn_rx_ring *, const void *,
-				    int, const struct hn_rxinfo *);
+static int			hn_rxpkt(struct hn_rx_ring *);
 static uint32_t			hn_rss_type_fromndis(uint32_t);
 static uint32_t			hn_rss_type_tondis(uint32_t);
 
@@ -3349,9 +3360,10 @@ again:
  * allocated with cluster size MJUMPAGESIZE, and filled
  * accordingly.
  *
- * Return 1 if able to complete the job; otherwise 0.
+ * Return the last mbuf in the chain or NULL if failed to
+ * allocate new mbuf.
  */
-static int
+static struct mbuf *
 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
 {
 	struct mbuf *m, *n;
@@ -3379,7 +3391,7 @@ hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
 		 */
 		n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE);
 		if (n == NULL)
-			break;
+			return NULL;
 		n->m_len = min(MJUMPAGESIZE, remainder);
 		bcopy(cp, mtod(n, caddr_t), n->m_len);
 		cp += n->m_len;
@@ -3387,10 +3399,8 @@ hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
 		m->m_next = n;
 		m = n;
 	}
-	if (m0->m_flags & M_PKTHDR)
-		m0->m_pkthdr.len += len - remainder;
 
-	return (remainder == 0);
+	return m;
 }
 
 #if defined(INET) || defined(INET6)
@@ -3408,14 +3418,14 @@ hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
 #endif
 
 static int
-hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
-    const struct hn_rxinfo *info)
+hn_rxpkt(struct hn_rx_ring *rxr)
 {
 	struct ifnet *ifp, *hn_ifp = rxr->hn_ifp;
-	struct mbuf *m_new;
+	struct mbuf *m_new, *n;
 	int size, do_lro = 0, do_csum = 1, is_vf = 0;
 	int hash_type = M_HASHTYPE_NONE;
 	int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE;
+	int i;
 
 	ifp = hn_ifp;
 	if (rxr->hn_rxvf_ifp != NULL) {
@@ -3442,20 +3452,20 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
 		return (0);
 	}
 
-	if (__predict_false(dlen < ETHER_HDR_LEN)) {
+	if (__predict_false(rxr->rsc.pktlen < ETHER_HDR_LEN)) {
 		if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1);
 		return (0);
 	}
 
-	if (dlen <= MHLEN) {
+	if (rxr->rsc.cnt == 1 && rxr->rsc.pktlen <= MHLEN) {
 		m_new = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m_new == NULL) {
 			if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
 			return (0);
 		}
-		memcpy(mtod(m_new, void *), data, dlen);
-		m_new->m_pkthdr.len = m_new->m_len = dlen;
-		rxr->hn_small_pkts++;
+		memcpy(mtod(m_new, void *), rxr->rsc.frag_data[0],
+		    rxr->rsc.frag_len[0]);
+		m_new->m_pkthdr.len = m_new->m_len = rxr->rsc.frag_len[0];
 	} else {
 		/*
 		 * Get an mbuf with a cluster.  For packets 2K or less,
@@ -3464,7 +3474,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
 		 * if looped around to the Hyper-V TX channel, so avoid them.
 		 */
 		size = MCLBYTES;
-		if (dlen > MCLBYTES) {
+		if (rxr->rsc.pktlen > MCLBYTES) {
 			/* 4096 */
 			size = MJUMPAGESIZE;
 		}
@@ -3475,29 +3485,42 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
 			return (0);
 		}
 
-		hv_m_append(m_new, dlen, data);
+		n = m_new;
+		for (i = 0; i < rxr->rsc.cnt; i++) {
+			n = hv_m_append(n, rxr->rsc.frag_len[i],
+			    rxr->rsc.frag_data[i]);
+			if (n == NULL) {
+				if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1);
+				return (0);
+			} else {
+				m_new->m_pkthdr.len += rxr->rsc.frag_len[i];
+			}
+		}
 	}
+	if (rxr->rsc.pktlen <= MHLEN)
+		rxr->hn_small_pkts++;
+
 	m_new->m_pkthdr.rcvif = ifp;
 
 	if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0))
 		do_csum = 0;
 
 	/* receive side checksum offload */
-	if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
+	if (rxr->rsc.csum_info != NULL) {
 		/* IP csum offload */
-		if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
+		if ((*(rxr->rsc.csum_info) & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_IP_CHECKED | CSUM_IP_VALID);
 			rxr->hn_csum_ip++;
 		}
 
 		/* TCP/UDP csum offload */
-		if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
+		if ((*(rxr->rsc.csum_info) & (NDIS_RXCSUM_INFO_UDPCS_OK |
 		     NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
 			m_new->m_pkthdr.csum_flags |=
 			    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 			m_new->m_pkthdr.csum_data = 0xffff;
-			if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
+			if (*(rxr->rsc.csum_info) & NDIS_RXCSUM_INFO_TCPCS_OK)
 				rxr->hn_csum_tcp++;
 			else
 				rxr->hn_csum_udp++;
@@ -3510,7 +3533,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
 		 * the do_lro setting here is actually _not_ accurate.  We
 		 * depend on the RSS hash type check to reset do_lro.
 		 */
-		if ((info->csum_info &
+		if ((*(rxr->rsc.csum_info) &
 		     (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
 		    (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
 			do_lro = 1;
@@ -3547,11 +3570,11 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
 		}
 	}
 
-	if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
+	if (rxr->rsc.vlan_info != NULL) {
 		m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
-		    NDIS_VLAN_INFO_ID(info->vlan_info),
-		    NDIS_VLAN_INFO_PRI(info->vlan_info),
-		    NDIS_VLAN_INFO_CFI(info->vlan_info));
+		    NDIS_VLAN_INFO_ID(*(rxr->rsc.vlan_info)),
+		    NDIS_VLAN_INFO_PRI(*(rxr->rsc.vlan_info)),
+		    NDIS_VLAN_INFO_CFI(*(rxr->rsc.vlan_info)));
 		m_new->m_flags |= M_VLANTAG;
 	}
 
@@ -3577,14 +3600,14 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
 	 * matter here), do _not_ mess with unsupported hash types or
 	 * functions.
 	 */
-	if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
+	if (rxr->rsc.hash_info != NULL) {
 		rxr->hn_rss_pkts++;
-		m_new->m_pkthdr.flowid = info->hash_value;
+		m_new->m_pkthdr.flowid = *(rxr->rsc.hash_value);
 		if (!is_vf)
 			hash_type = M_HASHTYPE_OPAQUE_HASH;
-		if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
+		if ((*(rxr->rsc.hash_info) & NDIS_HASH_FUNCTION_MASK) ==
 		    NDIS_HASH_FUNCTION_TOEPLITZ) {
-			uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK &
+			uint32_t type = (*(rxr->rsc.hash_info) & NDIS_HASH_TYPE_MASK &
 			    rxr->hn_mbuf_hash);
 
 			/*
@@ -5029,6 +5052,16 @@ hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
 				    OID_AUTO, "rss_pkts", CTLFLAG_RW,
 				    &rxr->hn_rss_pkts,
 				    "# of packets w/ RSS info received");
+				SYSCTL_ADD_ULONG(ctx,
+				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+				    OID_AUTO, "rsc_pkts", CTLFLAG_RW,
+				    &rxr->hn_rsc_pkts,
+				    "# of RSC packets received");
+				SYSCTL_ADD_ULONG(ctx,
+				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
+				    OID_AUTO, "rsc_drop", CTLFLAG_RW,
+				    &rxr->hn_rsc_drop,
+				    "# of RSC fragments dropped");
 				SYSCTL_ADD_INT(ctx,
 				    SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
 				    OID_AUTO, "pktbuf_len", CTLFLAG_RD,
@@ -7053,37 +7086,56 @@ hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
 		dlen = pi->rm_size - pi->rm_pktinfooffset;
 		data = pi->rm_data;
 
-		switch (pi->rm_type) {
-		case NDIS_PKTINFO_TYPE_VLAN:
-			if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
-				return (EINVAL);
-			info->vlan_info = *((const uint32_t *)data);
-			mask |= HN_RXINFO_VLAN;
-			break;
+		if (pi->rm_internal == 1) {
+			switch (pi->rm_type) {
+			case NDIS_PKTINFO_IT_PKTINFO_ID:
+				if (__predict_false(dlen < NDIS_PKTINFOID_SZ))
+					return (EINVAL);
+				info->pktinfo_id =
+				    (const struct packet_info_id *)data;
+				mask |= HN_RXINFO_PKTINFO_ID;
+				break;
 
-		case NDIS_PKTINFO_TYPE_CSUM:
-			if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
-				return (EINVAL);
-			info->csum_info = *((const uint32_t *)data);
-			mask |= HN_RXINFO_CSUM;
-			break;
+			default:
+				goto next;
+			}
+		} else {
+			switch (pi->rm_type) {
+			case NDIS_PKTINFO_TYPE_VLAN:
+				if (__predict_false(dlen
+				    < NDIS_VLAN_INFO_SIZE))
+					return (EINVAL);
+				info->vlan_info = (const uint32_t *)data;
+				mask |= HN_RXINFO_VLAN;
+				break;
 
-		case HN_NDIS_PKTINFO_TYPE_HASHVAL:
-			if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
-				return (EINVAL);
-			info->hash_value = *((const uint32_t *)data);
-			mask |= HN_RXINFO_HASHVAL;
-			break;
+			case NDIS_PKTINFO_TYPE_CSUM:
+				if (__predict_false(dlen
+				    < NDIS_RXCSUM_INFO_SIZE))
+					return (EINVAL);
+				info->csum_info = (const uint32_t *)data;
+				mask |= HN_RXINFO_CSUM;
+				break;
 
-		case HN_NDIS_PKTINFO_TYPE_HASHINF:
-			if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
-				return (EINVAL);
-			info->hash_info = *((const uint32_t *)data);
-			mask |= HN_RXINFO_HASHINF;
-			break;
+			case HN_NDIS_PKTINFO_TYPE_HASHVAL:
+				if (__predict_false(dlen
+				    < HN_NDIS_HASH_VALUE_SIZE))
+					return (EINVAL);
+				info->hash_value = (const uint32_t *)data;
+				mask |= HN_RXINFO_HASHVAL;
+				break;
 
-		default:
-			goto next;
+			case HN_NDIS_PKTINFO_TYPE_HASHINF:
+				if (__predict_false(dlen
+				    < HN_NDIS_HASH_INFO_SIZE))
+					return (EINVAL);
+				info->hash_info = (const uint32_t *)data;
+				mask |= HN_RXINFO_HASHINF;
+				break;
+
+			default:
+				goto next;
+			}
 		}
 
 		if (mask == HN_RXINFO_ALL) {
@@ -7100,7 +7152,7 @@ next:
 	 * - If there is no hash value, invalidate the hash info.
 	 */
 	if ((mask & HN_RXINFO_HASHVAL) == 0)
-		info->hash_info = HN_NDIS_HASH_INFO_INVALID;
+		info->hash_info = NULL;
 	return (0);
 }
 
@@ -7118,12 +7170,34 @@ hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
 	return (true);
 }
 
+static __inline void
+hn_rsc_add_data(struct hn_rx_ring *rxr, const void *data,
+		uint32_t len, struct hn_rxinfo *info)
+{
+	uint32_t cnt = rxr->rsc.cnt;
+
+	if (cnt) {
+		rxr->rsc.pktlen += len;
+	} else {
+		rxr->rsc.vlan_info = info->vlan_info;
+		rxr->rsc.csum_info = info->csum_info;
+		rxr->rsc.hash_info = info->hash_info;
+		rxr->rsc.hash_value = info->hash_value;
+		rxr->rsc.pktlen = len;
+	}
+
+	rxr->rsc.frag_data[cnt] = data;
+	rxr->rsc.frag_len[cnt] = len;
+	rxr->rsc.cnt++;
+}
+
 static void
 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
 {
 	const struct rndis_packet_msg *pkt;
 	struct hn_rxinfo info;
 	int data_off, pktinfo_off, data_len, pktinfo_len;
+	bool rsc_more= false;
 
 	/*
 	 * Check length.
@@ -7231,9 +7305,11 @@ hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
 	/*
 	 * Check per-packet-info coverage and find useful per-packet-info.
 	 */
-	info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
-	info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
-	info.hash_info = HN_NDIS_HASH_INFO_INVALID;
+	info.vlan_info = NULL;
+	info.csum_info = NULL;
+	info.hash_info = NULL;
+	info.pktinfo_id = NULL;
+
 	if (__predict_true(pktinfo_len != 0)) {
 		bool overlap;
 		int error;
@@ -7277,7 +7353,43 @@ hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
 		    pkt->rm_len, data_off, data_len);
 		return;
 	}
-	hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
+
+	/* Identify RSC fragments, drop invalid packets */
+	if ((info.pktinfo_id != NULL) &&
+	    (info.pktinfo_id->flag & HN_NDIS_PKTINFO_SUBALLOC)) {
+		if (info.pktinfo_id->flag & HN_NDIS_PKTINFO_1ST_FRAG) {
+			rxr->rsc.cnt = 0;
+			rxr->hn_rsc_pkts++;
+		} else if (rxr->rsc.cnt == 0)
+			goto drop;
+
+		rsc_more = true;
+
+		if (info.pktinfo_id->flag & HN_NDIS_PKTINFO_LAST_FRAG)
+			rsc_more = false;
+
+		if (rsc_more && rxr->rsc.is_last)
+			goto drop;
+	} else {
+		rxr->rsc.cnt = 0;
+	}
+
+	if (__predict_false(rxr->rsc.cnt >= HN_NVS_RSC_MAX))
+		goto drop;
+
+	/* Store data in per rx ring structure */
+	hn_rsc_add_data(rxr,((const uint8_t *)pkt) + data_off,
+	    data_len, &info);
+
+	if (rsc_more)
+		return;
+
+	hn_rxpkt(rxr);
+	rxr->rsc.cnt = 0;
+	return;
+drop:
+	rxr->hn_rsc_drop++;
+	return;
 }
 
 static __inline void
@@ -7390,6 +7502,8 @@ hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
 			    "ofs %d, len %d\n", i, ofs, len);
 			continue;
 		}
+
+		rxr->rsc.is_last = (i == (count - 1));
 		hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
 	}
 
diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h
index 154d69264679..54db556cc56d 100644
--- a/sys/dev/hyperv/netvsc/if_hnreg.h
+++ b/sys/dev/hyperv/netvsc/if_hnreg.h
@@ -48,6 +48,8 @@
 #define HN_NVS_VERSION_2		0x30002
 #define HN_NVS_VERSION_4		0x40000
 #define HN_NVS_VERSION_5		0x50000
+#define HN_NVS_VERSION_6		0x60000
+#define HN_NVS_VERSION_61		0x60001
 
 #define HN_NVS_RXBUF_SIG		0xcafe
 #define HN_NVS_CHIM_SIG			0xface
@@ -101,6 +103,7 @@ struct hn_nvs_init {
 	uint32_t	nvs_ver_min;
 	uint32_t	nvs_ver_max;
 	uint8_t		nvs_rsvd[20];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_init) >= HN_NVS_REQSIZE_MIN);
 
@@ -118,11 +121,13 @@ struct hn_nvs_ndis_conf {
 	uint32_t	nvs_rsvd;
 	uint64_t	nvs_caps;	/* HN_NVS_NDIS_CONF_ */
 	uint8_t		nvs_rsvd1[12];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_ndis_conf) >= HN_NVS_REQSIZE_MIN);
 
 #define HN_NVS_NDIS_CONF_SRIOV		0x0004
 #define HN_NVS_NDIS_CONF_VLAN		0x0008
+#define HN_NVS_NDIS_CONF_RSC		0x0080
 
 /* No response */
 struct hn_nvs_ndis_init {
@@ -130,6 +135,7 @@ struct hn_nvs_ndis_init {
 	uint32_t	nvs_ndis_major;	/* NDIS_VERSION_MAJOR_ */
 	uint32_t	nvs_ndis_minor;	/* NDIS_VERSION_MINOR_ */
 	uint8_t		nvs_rsvd[20];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_ndis_init) >= HN_NVS_REQSIZE_MIN);
 
@@ -141,6 +147,7 @@ struct hn_nvs_datapath {
 	uint32_t	nvs_type;	/* HN_NVS_TYPE_SET_DATAPATH */
 	uint32_t	nvs_active_path;/* HN_NVS_DATAPATH_* */
 	uint32_t	nvs_rsvd[6];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_datapath) >= HN_NVS_REQSIZE_MIN);
 
@@ -149,6 +156,7 @@ struct hn_nvs_rxbuf_conn {
 	uint32_t	nvs_gpadl;	/* RXBUF vmbus GPADL */
 	uint16_t	nvs_sig;	/* HN_NVS_RXBUF_SIG */
 	uint8_t		nvs_rsvd[22];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_rxbuf_conn) >= HN_NVS_REQSIZE_MIN);
 
@@ -171,6 +179,7 @@ struct hn_nvs_rxbuf_disconn {
 	uint32_t	nvs_type;	/* HN_NVS_TYPE_RXBUF_DISCONN */
 	uint16_t	nvs_sig;	/* HN_NVS_RXBUF_SIG */
 	uint8_t		nvs_rsvd[26];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_rxbuf_disconn) >= HN_NVS_REQSIZE_MIN);
 
@@ -179,6 +188,7 @@ struct hn_nvs_chim_conn {
 	uint32_t	nvs_gpadl;	/* chimney buf vmbus GPADL */
 	uint16_t	nvs_sig;	/* NDIS_NVS_CHIM_SIG */
 	uint8_t		nvs_rsvd[22];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_chim_conn) >= HN_NVS_REQSIZE_MIN);
 
@@ -193,6 +203,7 @@ struct hn_nvs_chim_disconn {
 	uint32_t	nvs_type;	/* HN_NVS_TYPE_CHIM_DISCONN */
 	uint16_t	nvs_sig;	/* HN_NVS_CHIM_SIG */
 	uint8_t		nvs_rsvd[26];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_chim_disconn) >= HN_NVS_REQSIZE_MIN);
 
@@ -203,6 +214,7 @@ struct hn_nvs_subch_req {
 	uint32_t	nvs_op;		/* HN_NVS_SUBCH_OP_ */
 	uint32_t	nvs_nsubch;
 	uint8_t		nvs_rsvd[20];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_subch_req) >= HN_NVS_REQSIZE_MIN);
 
@@ -226,6 +238,7 @@ struct hn_nvs_rndis {
 	uint32_t	nvs_chim_idx;
 	uint32_t	nvs_chim_sz;
 	uint8_t		nvs_rsvd[16];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_rndis) >= HN_NVS_REQSIZE_MIN);
 
@@ -233,6 +246,7 @@ struct hn_nvs_rndis_ack {
 	uint32_t	nvs_type;	/* HN_NVS_TYPE_RNDIS_ACK */
 	uint32_t	nvs_status;	/* HN_NVS_STATUS_ */
 	uint8_t		nvs_rsvd[24];
+	uint8_t		nvs_msg_pad[8];
 } __packed;
 CTASSERT(sizeof(struct hn_nvs_rndis_ack) >= HN_NVS_REQSIZE_MIN);
 
diff --git a/sys/dev/hyperv/netvsc/if_hnvar.h b/sys/dev/hyperv/netvsc/if_hnvar.h
index c0e17c9643e0..27d93db5395e 100644
--- a/sys/dev/hyperv/netvsc/if_hnvar.h
+++ b/sys/dev/hyperv/netvsc/if_hnvar.h
@@ -33,7 +33,7 @@
 
 #define HN_CHIM_SIZE			(15 * 1024 * 1024)
 
-#define HN_RXBUF_SIZE			(16 * 1024 * 1024)
+#define HN_RXBUF_SIZE			(31 * 1024 * 1024)
 #define HN_RXBUF_SIZE_COMPAT		(15 * 1024 * 1024)
 
 #define HN_MTU_MAX			(65535 - ETHER_ADDR_LEN)
@@ -56,6 +56,20 @@ struct buf_ring;
 #endif
 struct hn_tx_ring;
 
+#define	HN_NVS_RSC_MAX		562	/* Max RSC frags in one vmbus packet */
+
+struct hn_rx_rsc {
+	const uint32_t		*vlan_info;
+	const uint32_t		*csum_info;
+	const uint32_t		*hash_info;
+	const uint32_t		*hash_value;
+	uint32_t		cnt;		/* fragment count */
+	uint32_t		pktlen;		/* full packet length */
+	uint8_t			is_last;	/* last fragment */
+	const void		*frag_data[HN_NVS_RSC_MAX];
+	uint32_t		frag_len[HN_NVS_RSC_MAX];
+};
+
 struct hn_rx_ring {
 	struct ifnet	*hn_ifp;
 	struct ifnet	*hn_rxvf_ifp;	/* SR-IOV VF for RX */
@@ -66,6 +80,7 @@ struct hn_rx_ring {
 	uint32_t	hn_mbuf_hash;	/* NDIS_HASH_ */
 	uint8_t		*hn_rxbuf;	/* shadow sc->hn_rxbuf */
 	int		hn_rx_idx;
+	struct hn_rx_rsc rsc;
 
 	/* Trust csum verification on host side */
 	int		hn_trust_hcsum;	/* HN_TRUST_HCSUM_ */
@@ -80,6 +95,8 @@ struct hn_rx_ring {
 	u_long		hn_pkts;
 	u_long		hn_rss_pkts;
 	u_long		hn_ack_failed;
+	u_long		hn_rsc_pkts;
+	u_long		hn_rsc_drop;
 
 	/* Rarely used stuffs */
 	struct sysctl_oid *hn_rx_sysctl_tree;
diff --git a/sys/dev/hyperv/netvsc/ndis.h b/sys/dev/hyperv/netvsc/ndis.h
index 32b6aa307452..c69da7807a63 100644
--- a/sys/dev/hyperv/netvsc/ndis.h
+++ b/sys/dev/hyperv/netvsc/ndis.h
@@ -115,8 +115,8 @@ struct ndis_offload_params {
 	/* NDIS >= 6.30 */
 	uint8_t			ndis_rsc_ip4;	/* NDIS_OFFLOAD_RSC_ */
 	uint8_t			ndis_rsc_ip6;	/* NDIS_OFFLOAD_RSC_ */
-	uint32_t		ndis_encap;	/* NDIS_OFFLOAD_SET_ */
-	uint32_t		ndis_encap_types;/* NDIS_ENCAP_TYPE_ */
+	uint8_t			ndis_encap;	/* NDIS_OFFLOAD_SET_ */
+	uint8_t			ndis_encap_types;/* NDIS_ENCAP_TYPE_ */
 };
 
 #define	NDIS_OFFLOAD_PARAMS_SIZE	sizeof(struct ndis_offload_params)
@@ -305,17 +305,17 @@ struct ndis_lsov2_offload {
 
 struct ndis_ipsecv2_offload {
 	uint32_t			ndis_encap;	/*NDIS_OFFLOAD_ENCAP_*/
-	uint16_t			ndis_ip6;
-	uint16_t			ndis_ip4opt;
-	uint16_t			ndis_ip6ext;
-	uint16_t			ndis_ah;
-	uint16_t			ndis_esp;
-	uint16_t			ndis_ah_esp;
-	uint16_t			ndis_xport;
-	uint16_t			ndis_tun;
-	uint16_t			ndis_xport_tun;
-	uint16_t			ndis_lso;
-	uint16_t			ndis_extseq;
+	uint8_t			ndis_ip6;
+	uint8_t			ndis_ip4opt;
+	uint8_t			ndis_ip6ext;
+	uint8_t			ndis_ah;
+	uint8_t			ndis_esp;
+	uint8_t			ndis_ah_esp;
+	uint8_t			ndis_xport;
+	uint8_t			ndis_tun;
+	uint8_t			ndis_xport_tun;
+	uint8_t			ndis_lso;
+	uint8_t			ndis_extseq;
 	uint32_t			ndis_udp_esp;
 	uint32_t			ndis_auth;
 	uint32_t			ndis_crypto;
@@ -323,8 +323,8 @@ struct ndis_ipsecv2_offload {
 };
 
 struct ndis_rsc_offload {
-	uint16_t			ndis_ip4;
-	uint16_t			ndis_ip6;
+	uint8_t			ndis_ip4;
+	uint8_t			ndis_ip6;
 };
 
 struct ndis_encap_offload {
@@ -419,5 +419,4 @@ struct ndis_offload {
 
 #define	NDIS_TXCSUM_INFO_MKUDPCS(thoff)				\
 	NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_UDPCS)
-
 #endif	/* !_NET_NDIS_H_ */
diff --git a/sys/net/rndis.h b/sys/net/rndis.h
index 5d45469a4b52..56350dbb7824 100644
--- a/sys/net/rndis.h
+++ b/sys/net/rndis.h
@@ -142,7 +142,8 @@ struct rndis_packet_msg {
 /* Per-packet-info for RNDIS data message */
 struct rndis_pktinfo {
 	uint32_t rm_size;
-	uint32_t rm_type;		/* NDIS_PKTINFO_TYPE_ */
+	uint32_t rm_type:31;		/* NDIS_PKTINFO_TYPE_ */
+	uint32_t rm_internal:1;		/* Indicate if internal type */
 	uint32_t rm_pktinfooffset;
 	uint8_t rm_data[];
 };
@@ -165,6 +166,10 @@ struct rndis_pktinfo {
 #define	NDIS_PKTINFO_TYPE_CACHE_NBLIST	10
 #define	NDIS_PKTINFO_TYPE_PKT_PAD	11
 
+/* Per-packet-info internal type */
+#define	NDIS_PKTINFO_IT_PKTINFO_ID	1
+/* Add more internal type here */
+
 /*
  * RNDIS control messages
  */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202103290912.12T9CDTY052724>