From owner-dev-commits-src-all@freebsd.org Mon Mar 29 10:20:52 2021 Return-Path: Delivered-To: dev-commits-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 1688E5C3CD2; Mon, 29 Mar 2021 10:20:52 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4F87rh090kz3D7M; Mon, 29 Mar 2021 10:20:52 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id EC8EC11758; Mon, 29 Mar 2021 10:20:51 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 12TAKpBB046106; Mon, 29 Mar 2021 10:20:51 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 12TAKpGs046105; Mon, 29 Mar 2021 10:20:51 GMT (envelope-from git) Date: Mon, 29 Mar 2021 10:20:51 GMT Message-Id: <202103291020.12TAKpGs046105@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: Wei Hu Subject: git: cdc59163ff8e - stable/13 - Hyper-V: hn: Enable vSwitch RSC support in hn netvsc driver MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: whu X-Git-Repository: src X-Git-Refname: refs/heads/stable/13 X-Git-Reftype: branch X-Git-Commit: cdc59163ff8e8f2a98ef8ca02773d1d2047e3aae Auto-Submitted: auto-generated X-BeenThere: dev-commits-src-all@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Commit messages for all branches of the src repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 29 Mar 2021 10:20:52 -0000 The branch stable/13 has been updated by whu: URL: https://cgit.FreeBSD.org/src/commit/?id=cdc59163ff8e8f2a98ef8ca02773d1d2047e3aae commit cdc59163ff8e8f2a98ef8ca02773d1d2047e3aae Author: Wei Hu AuthorDate: 2021-03-12 04:35:16 +0000 Commit: Wei Hu CommitDate: 2021-03-29 10:20:03 +0000 Hyper-V: hn: Enable vSwitch RSC support in hn netvsc driver Receive Segment Coalescing (RSC) in the vSwitch is a feature available in Windows Server 2019 hosts and later. It reduces the per packet processing overhead by coalescing multiple TCP segments when possible. This happens mostly when TCP traffics are among different guests on same host. This patch adds netvsc driver support for this feature. The patch also updates NVS version to 6.1 as needed for RSC enablement. MFC after: 2 weeks Sponsored by: Microsoft Differential Revision: https://reviews.freebsd.org/D29075 (cherry picked from commit a491581f3f8df07cdff0236bd556895205929af4) --- sys/dev/hyperv/netvsc/hn_nvs.c | 5 + sys/dev/hyperv/netvsc/hn_rndis.c | 25 ++++ sys/dev/hyperv/netvsc/if_hn.c | 258 ++++++++++++++++++++++++++++----------- sys/dev/hyperv/netvsc/if_hnreg.h | 14 +++ sys/dev/hyperv/netvsc/if_hnvar.h | 19 ++- sys/dev/hyperv/netvsc/ndis.h | 31 +++-- sys/net/rndis.h | 7 +- 7 files changed, 269 insertions(+), 90 deletions(-) diff --git a/sys/dev/hyperv/netvsc/hn_nvs.c b/sys/dev/hyperv/netvsc/hn_nvs.c index 73a112c4e5e1..4dbc28996617 100644 --- a/sys/dev/hyperv/netvsc/hn_nvs.c +++ b/sys/dev/hyperv/netvsc/hn_nvs.c @@ -80,6 +80,8 @@ struct hn_nvs_sendctx hn_nvs_sendctx_none = HN_NVS_SENDCTX_INITIALIZER(hn_nvs_sent_none, NULL); static const uint32_t hn_nvs_version[] = { + HN_NVS_VERSION_61, + HN_NVS_VERSION_6, HN_NVS_VERSION_5, HN_NVS_VERSION_4, HN_NVS_VERSION_2, @@ -508,6 +510,9 @@ hn_nvs_conf_ndis(struct hn_softc *sc, int mtu) conf.nvs_caps = HN_NVS_NDIS_CONF_VLAN; if (sc->hn_nvs_ver >= HN_NVS_VERSION_5) conf.nvs_caps |= HN_NVS_NDIS_CONF_SRIOV; + if (sc->hn_nvs_ver >= HN_NVS_VERSION_61) + conf.nvs_caps |= HN_NVS_NDIS_CONF_RSC; + /* NOTE: No response. */ error = hn_nvs_req_send(sc, &conf, sizeof(conf)); diff --git a/sys/dev/hyperv/netvsc/hn_rndis.c b/sys/dev/hyperv/netvsc/hn_rndis.c index b9bf683fe811..794a82cf3957 100644 --- a/sys/dev/hyperv/netvsc/hn_rndis.c +++ b/sys/dev/hyperv/netvsc/hn_rndis.c @@ -723,6 +723,17 @@ hn_rndis_conf_offload(struct hn_softc *sc, int mtu) params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_RX; } + /* RSC offload */ + if (hwcaps.ndis_hdr.ndis_rev >= NDIS_OFFLOAD_PARAMS_REV_3) { + if (hwcaps.ndis_rsc.ndis_ip4 && hwcaps.ndis_rsc.ndis_ip6) { + params.ndis_rsc_ip4 = NDIS_OFFLOAD_RSC_ON; + params.ndis_rsc_ip6 = NDIS_OFFLOAD_RSC_ON; + } else { + params.ndis_rsc_ip4 = NDIS_OFFLOAD_RSC_OFF; + params.ndis_rsc_ip6 = NDIS_OFFLOAD_RSC_OFF; + } + } + if (bootverbose) { if_printf(sc->hn_ifp, "offload csum: " "ip4 %u, tcp4 %u, udp4 %u, tcp6 %u, udp6 %u\n", @@ -734,6 +745,10 @@ hn_rndis_conf_offload(struct hn_softc *sc, int mtu) if_printf(sc->hn_ifp, "offload lsov2: ip4 %u, ip6 %u\n", params.ndis_lsov2_ip4, params.ndis_lsov2_ip6); + if (hwcaps.ndis_hdr.ndis_rev >= NDIS_OFFLOAD_PARAMS_REV_3) + if_printf(sc->hn_ifp, "offload rsc: ip4 %u, ip6 %u\n", + params.ndis_rsc_ip4, + params.ndis_rsc_ip6); } error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, ¶ms, paramsz); @@ -969,6 +984,11 @@ hn_rndis_query_hwcaps(struct hn_softc *sc, struct ndis_offload *caps) if_printf(sc->hn_ifp, "invalid NDIS objsize %u\n", caps->ndis_hdr.ndis_size); return (EINVAL); + } else if (caps->ndis_hdr.ndis_rev >= NDIS_OFFLOAD_REV_3 && + caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE) { + if_printf(sc->hn_ifp, "invalid NDIS rev3 objsize %u\n", + caps->ndis_hdr.ndis_size); + return (EINVAL); } if (bootverbose) { @@ -1001,6 +1021,11 @@ hn_rndis_query_hwcaps(struct hn_softc *sc, struct ndis_offload *caps) caps->ndis_lsov2.ndis_ip6_minsg, caps->ndis_lsov2.ndis_ip6_encap, caps->ndis_lsov2.ndis_ip6_opts); + if (caps->ndis_hdr.ndis_rev >= NDIS_OFFLOAD_REV_3) + if_printf(sc->hn_ifp, "hwcaps rsc: " + "ip4 %u ip6 %u\n", + caps->ndis_rsc.ndis_ip4, + caps->ndis_rsc.ndis_ip6); } return (0); } diff --git a/sys/dev/hyperv/netvsc/if_hn.c b/sys/dev/hyperv/netvsc/if_hn.c index 9243ff226f5b..f4bdbb1ee788 100644 --- a/sys/dev/hyperv/netvsc/if_hn.c +++ b/sys/dev/hyperv/netvsc/if_hn.c @@ -223,11 +223,25 @@ struct hn_txdesc { #define HN_TXD_FLAG_DMAMAP 0x0002 #define HN_TXD_FLAG_ONAGG 0x0004 +#define HN_NDIS_PKTINFO_SUBALLOC 0x01 +#define HN_NDIS_PKTINFO_1ST_FRAG 0x02 +#define HN_NDIS_PKTINFO_LAST_FRAG 0x04 + +struct packet_info_id { + uint8_t ver; + uint8_t flag; + uint16_t pkt_id; +}; + +#define NDIS_PKTINFOID_SZ sizeof(struct packet_info_id) + + struct hn_rxinfo { - uint32_t vlan_info; - uint32_t csum_info; - uint32_t hash_info; - uint32_t hash_value; + const uint32_t *vlan_info; + const uint32_t *csum_info; + const uint32_t *hash_info; + const uint32_t *hash_value; + const struct packet_info_id *pktinfo_id; }; struct hn_rxvf_setarg { @@ -239,15 +253,13 @@ struct hn_rxvf_setarg { #define HN_RXINFO_CSUM 0x0002 #define HN_RXINFO_HASHINF 0x0004 #define HN_RXINFO_HASHVAL 0x0008 +#define HN_RXINFO_PKTINFO_ID 0x0010 #define HN_RXINFO_ALL \ (HN_RXINFO_VLAN | \ HN_RXINFO_CSUM | \ HN_RXINFO_HASHINF | \ - HN_RXINFO_HASHVAL) - -#define HN_NDIS_VLAN_INFO_INVALID 0xffffffff -#define HN_NDIS_RXCSUM_INFO_INVALID 0 -#define HN_NDIS_HASH_INFO_INVALID 0 + HN_RXINFO_HASHVAL | \ + HN_RXINFO_PKTINFO_ID) static int hn_probe(device_t); static int hn_attach(device_t); @@ -396,8 +408,7 @@ static int hn_rxfilter_config(struct hn_softc *); static int hn_rss_reconfig(struct hn_softc *); static void hn_rss_ind_fixup(struct hn_softc *); static void hn_rss_mbuf_hash(struct hn_softc *, uint32_t); -static int hn_rxpkt(struct hn_rx_ring *, const void *, - int, const struct hn_rxinfo *); +static int hn_rxpkt(struct hn_rx_ring *); static uint32_t hn_rss_type_fromndis(uint32_t); static uint32_t hn_rss_type_tondis(uint32_t); @@ -3353,9 +3364,10 @@ again: * allocated with cluster size MJUMPAGESIZE, and filled * accordingly. * - * Return 1 if able to complete the job; otherwise 0. + * Return the last mbuf in the chain or NULL if failed to + * allocate new mbuf. */ -static int +static struct mbuf * hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; @@ -3383,7 +3395,7 @@ hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) */ n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); if (n == NULL) - break; + return NULL; n->m_len = min(MJUMPAGESIZE, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len; @@ -3391,10 +3403,8 @@ hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) m->m_next = n; m = n; } - if (m0->m_flags & M_PKTHDR) - m0->m_pkthdr.len += len - remainder; - return (remainder == 0); + return m; } #if defined(INET) || defined(INET6) @@ -3412,14 +3422,14 @@ hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) #endif static int -hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, - const struct hn_rxinfo *info) +hn_rxpkt(struct hn_rx_ring *rxr) { struct ifnet *ifp, *hn_ifp = rxr->hn_ifp; - struct mbuf *m_new; + struct mbuf *m_new, *n; int size, do_lro = 0, do_csum = 1, is_vf = 0; int hash_type = M_HASHTYPE_NONE; int l3proto = ETHERTYPE_MAX, l4proto = IPPROTO_DONE; + int i; ifp = hn_ifp; if (rxr->hn_rxvf_ifp != NULL) { @@ -3446,20 +3456,20 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, return (0); } - if (__predict_false(dlen < ETHER_HDR_LEN)) { + if (__predict_false(rxr->rsc.pktlen < ETHER_HDR_LEN)) { if_inc_counter(hn_ifp, IFCOUNTER_IERRORS, 1); return (0); } - if (dlen <= MHLEN) { + if (rxr->rsc.cnt == 1 && rxr->rsc.pktlen <= MHLEN) { m_new = m_gethdr(M_NOWAIT, MT_DATA); if (m_new == NULL) { if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); return (0); } - memcpy(mtod(m_new, void *), data, dlen); - m_new->m_pkthdr.len = m_new->m_len = dlen; - rxr->hn_small_pkts++; + memcpy(mtod(m_new, void *), rxr->rsc.frag_data[0], + rxr->rsc.frag_len[0]); + m_new->m_pkthdr.len = m_new->m_len = rxr->rsc.frag_len[0]; } else { /* * Get an mbuf with a cluster. For packets 2K or less, @@ -3468,7 +3478,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, * if looped around to the Hyper-V TX channel, so avoid them. */ size = MCLBYTES; - if (dlen > MCLBYTES) { + if (rxr->rsc.pktlen > MCLBYTES) { /* 4096 */ size = MJUMPAGESIZE; } @@ -3479,29 +3489,42 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, return (0); } - hv_m_append(m_new, dlen, data); + n = m_new; + for (i = 0; i < rxr->rsc.cnt; i++) { + n = hv_m_append(n, rxr->rsc.frag_len[i], + rxr->rsc.frag_data[i]); + if (n == NULL) { + if_inc_counter(hn_ifp, IFCOUNTER_IQDROPS, 1); + return (0); + } else { + m_new->m_pkthdr.len += rxr->rsc.frag_len[i]; + } + } } + if (rxr->rsc.pktlen <= MHLEN) + rxr->hn_small_pkts++; + m_new->m_pkthdr.rcvif = ifp; if (__predict_false((hn_ifp->if_capenable & IFCAP_RXCSUM) == 0)) do_csum = 0; /* receive side checksum offload */ - if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { + if (rxr->rsc.csum_info != NULL) { /* IP csum offload */ - if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { + if ((*(rxr->rsc.csum_info) & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); rxr->hn_csum_ip++; } /* TCP/UDP csum offload */ - if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | + if ((*(rxr->rsc.csum_info) & (NDIS_RXCSUM_INFO_UDPCS_OK | NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; - if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) + if (*(rxr->rsc.csum_info) & NDIS_RXCSUM_INFO_TCPCS_OK) rxr->hn_csum_tcp++; else rxr->hn_csum_udp++; @@ -3514,7 +3537,7 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, * the do_lro setting here is actually _not_ accurate. We * depend on the RSS hash type check to reset do_lro. */ - if ((info->csum_info & + if ((*(rxr->rsc.csum_info) & (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) do_lro = 1; @@ -3551,11 +3574,11 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, } } - if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { + if (rxr->rsc.vlan_info != NULL) { m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( - NDIS_VLAN_INFO_ID(info->vlan_info), - NDIS_VLAN_INFO_PRI(info->vlan_info), - NDIS_VLAN_INFO_CFI(info->vlan_info)); + NDIS_VLAN_INFO_ID(*(rxr->rsc.vlan_info)), + NDIS_VLAN_INFO_PRI(*(rxr->rsc.vlan_info)), + NDIS_VLAN_INFO_CFI(*(rxr->rsc.vlan_info))); m_new->m_flags |= M_VLANTAG; } @@ -3581,14 +3604,14 @@ hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen, * matter here), do _not_ mess with unsupported hash types or * functions. */ - if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { + if (rxr->rsc.hash_info != NULL) { rxr->hn_rss_pkts++; - m_new->m_pkthdr.flowid = info->hash_value; + m_new->m_pkthdr.flowid = *(rxr->rsc.hash_value); if (!is_vf) hash_type = M_HASHTYPE_OPAQUE_HASH; - if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == + if ((*(rxr->rsc.hash_info) & NDIS_HASH_FUNCTION_MASK) == NDIS_HASH_FUNCTION_TOEPLITZ) { - uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK & + uint32_t type = (*(rxr->rsc.hash_info) & NDIS_HASH_TYPE_MASK & rxr->hn_mbuf_hash); /* @@ -5033,6 +5056,16 @@ hn_create_rx_data(struct hn_softc *sc, int ring_cnt) OID_AUTO, "rss_pkts", CTLFLAG_RW, &rxr->hn_rss_pkts, "# of packets w/ RSS info received"); + SYSCTL_ADD_ULONG(ctx, + SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), + OID_AUTO, "rsc_pkts", CTLFLAG_RW, + &rxr->hn_rsc_pkts, + "# of RSC packets received"); + SYSCTL_ADD_ULONG(ctx, + SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), + OID_AUTO, "rsc_drop", CTLFLAG_RW, + &rxr->hn_rsc_drop, + "# of RSC fragments dropped"); SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "pktbuf_len", CTLFLAG_RD, @@ -7057,37 +7090,56 @@ hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info) dlen = pi->rm_size - pi->rm_pktinfooffset; data = pi->rm_data; - switch (pi->rm_type) { - case NDIS_PKTINFO_TYPE_VLAN: - if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) - return (EINVAL); - info->vlan_info = *((const uint32_t *)data); - mask |= HN_RXINFO_VLAN; - break; + if (pi->rm_internal == 1) { + switch (pi->rm_type) { + case NDIS_PKTINFO_IT_PKTINFO_ID: + if (__predict_false(dlen < NDIS_PKTINFOID_SZ)) + return (EINVAL); + info->pktinfo_id = + (const struct packet_info_id *)data; + mask |= HN_RXINFO_PKTINFO_ID; + break; - case NDIS_PKTINFO_TYPE_CSUM: - if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) - return (EINVAL); - info->csum_info = *((const uint32_t *)data); - mask |= HN_RXINFO_CSUM; - break; + default: + goto next; + } + } else { + switch (pi->rm_type) { + case NDIS_PKTINFO_TYPE_VLAN: + if (__predict_false(dlen + < NDIS_VLAN_INFO_SIZE)) + return (EINVAL); + info->vlan_info = (const uint32_t *)data; + mask |= HN_RXINFO_VLAN; + break; - case HN_NDIS_PKTINFO_TYPE_HASHVAL: - if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) - return (EINVAL); - info->hash_value = *((const uint32_t *)data); - mask |= HN_RXINFO_HASHVAL; - break; + case NDIS_PKTINFO_TYPE_CSUM: + if (__predict_false(dlen + < NDIS_RXCSUM_INFO_SIZE)) + return (EINVAL); + info->csum_info = (const uint32_t *)data; + mask |= HN_RXINFO_CSUM; + break; - case HN_NDIS_PKTINFO_TYPE_HASHINF: - if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) - return (EINVAL); - info->hash_info = *((const uint32_t *)data); - mask |= HN_RXINFO_HASHINF; - break; + case HN_NDIS_PKTINFO_TYPE_HASHVAL: + if (__predict_false(dlen + < HN_NDIS_HASH_VALUE_SIZE)) + return (EINVAL); + info->hash_value = (const uint32_t *)data; + mask |= HN_RXINFO_HASHVAL; + break; - default: - goto next; + case HN_NDIS_PKTINFO_TYPE_HASHINF: + if (__predict_false(dlen + < HN_NDIS_HASH_INFO_SIZE)) + return (EINVAL); + info->hash_info = (const uint32_t *)data; + mask |= HN_RXINFO_HASHINF; + break; + + default: + goto next; + } } if (mask == HN_RXINFO_ALL) { @@ -7104,7 +7156,7 @@ next: * - If there is no hash value, invalidate the hash info. */ if ((mask & HN_RXINFO_HASHVAL) == 0) - info->hash_info = HN_NDIS_HASH_INFO_INVALID; + info->hash_info = NULL; return (0); } @@ -7122,12 +7174,34 @@ hn_rndis_check_overlap(int off, int len, int check_off, int check_len) return (true); } +static __inline void +hn_rsc_add_data(struct hn_rx_ring *rxr, const void *data, + uint32_t len, struct hn_rxinfo *info) +{ + uint32_t cnt = rxr->rsc.cnt; + + if (cnt) { + rxr->rsc.pktlen += len; + } else { + rxr->rsc.vlan_info = info->vlan_info; + rxr->rsc.csum_info = info->csum_info; + rxr->rsc.hash_info = info->hash_info; + rxr->rsc.hash_value = info->hash_value; + rxr->rsc.pktlen = len; + } + + rxr->rsc.frag_data[cnt] = data; + rxr->rsc.frag_len[cnt] = len; + rxr->rsc.cnt++; +} + static void hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) { const struct rndis_packet_msg *pkt; struct hn_rxinfo info; int data_off, pktinfo_off, data_len, pktinfo_len; + bool rsc_more= false; /* * Check length. @@ -7235,9 +7309,11 @@ hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) /* * Check per-packet-info coverage and find useful per-packet-info. */ - info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; - info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; - info.hash_info = HN_NDIS_HASH_INFO_INVALID; + info.vlan_info = NULL; + info.csum_info = NULL; + info.hash_info = NULL; + info.pktinfo_id = NULL; + if (__predict_true(pktinfo_len != 0)) { bool overlap; int error; @@ -7281,7 +7357,43 @@ hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen) pkt->rm_len, data_off, data_len); return; } - hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); + + /* Identify RSC fragments, drop invalid packets */ + if ((info.pktinfo_id != NULL) && + (info.pktinfo_id->flag & HN_NDIS_PKTINFO_SUBALLOC)) { + if (info.pktinfo_id->flag & HN_NDIS_PKTINFO_1ST_FRAG) { + rxr->rsc.cnt = 0; + rxr->hn_rsc_pkts++; + } else if (rxr->rsc.cnt == 0) + goto drop; + + rsc_more = true; + + if (info.pktinfo_id->flag & HN_NDIS_PKTINFO_LAST_FRAG) + rsc_more = false; + + if (rsc_more && rxr->rsc.is_last) + goto drop; + } else { + rxr->rsc.cnt = 0; + } + + if (__predict_false(rxr->rsc.cnt >= HN_NVS_RSC_MAX)) + goto drop; + + /* Store data in per rx ring structure */ + hn_rsc_add_data(rxr,((const uint8_t *)pkt) + data_off, + data_len, &info); + + if (rsc_more) + return; + + hn_rxpkt(rxr); + rxr->rsc.cnt = 0; + return; +drop: + rxr->hn_rsc_drop++; + return; } static __inline void @@ -7394,6 +7506,8 @@ hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan, "ofs %d, len %d\n", i, ofs, len); continue; } + + rxr->rsc.is_last = (i == (count - 1)); hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len); } diff --git a/sys/dev/hyperv/netvsc/if_hnreg.h b/sys/dev/hyperv/netvsc/if_hnreg.h index 154d69264679..54db556cc56d 100644 --- a/sys/dev/hyperv/netvsc/if_hnreg.h +++ b/sys/dev/hyperv/netvsc/if_hnreg.h @@ -48,6 +48,8 @@ #define HN_NVS_VERSION_2 0x30002 #define HN_NVS_VERSION_4 0x40000 #define HN_NVS_VERSION_5 0x50000 +#define HN_NVS_VERSION_6 0x60000 +#define HN_NVS_VERSION_61 0x60001 #define HN_NVS_RXBUF_SIG 0xcafe #define HN_NVS_CHIM_SIG 0xface @@ -101,6 +103,7 @@ struct hn_nvs_init { uint32_t nvs_ver_min; uint32_t nvs_ver_max; uint8_t nvs_rsvd[20]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_init) >= HN_NVS_REQSIZE_MIN); @@ -118,11 +121,13 @@ struct hn_nvs_ndis_conf { uint32_t nvs_rsvd; uint64_t nvs_caps; /* HN_NVS_NDIS_CONF_ */ uint8_t nvs_rsvd1[12]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_ndis_conf) >= HN_NVS_REQSIZE_MIN); #define HN_NVS_NDIS_CONF_SRIOV 0x0004 #define HN_NVS_NDIS_CONF_VLAN 0x0008 +#define HN_NVS_NDIS_CONF_RSC 0x0080 /* No response */ struct hn_nvs_ndis_init { @@ -130,6 +135,7 @@ struct hn_nvs_ndis_init { uint32_t nvs_ndis_major; /* NDIS_VERSION_MAJOR_ */ uint32_t nvs_ndis_minor; /* NDIS_VERSION_MINOR_ */ uint8_t nvs_rsvd[20]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_ndis_init) >= HN_NVS_REQSIZE_MIN); @@ -141,6 +147,7 @@ struct hn_nvs_datapath { uint32_t nvs_type; /* HN_NVS_TYPE_SET_DATAPATH */ uint32_t nvs_active_path;/* HN_NVS_DATAPATH_* */ uint32_t nvs_rsvd[6]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_datapath) >= HN_NVS_REQSIZE_MIN); @@ -149,6 +156,7 @@ struct hn_nvs_rxbuf_conn { uint32_t nvs_gpadl; /* RXBUF vmbus GPADL */ uint16_t nvs_sig; /* HN_NVS_RXBUF_SIG */ uint8_t nvs_rsvd[22]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_rxbuf_conn) >= HN_NVS_REQSIZE_MIN); @@ -171,6 +179,7 @@ struct hn_nvs_rxbuf_disconn { uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_DISCONN */ uint16_t nvs_sig; /* HN_NVS_RXBUF_SIG */ uint8_t nvs_rsvd[26]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_rxbuf_disconn) >= HN_NVS_REQSIZE_MIN); @@ -179,6 +188,7 @@ struct hn_nvs_chim_conn { uint32_t nvs_gpadl; /* chimney buf vmbus GPADL */ uint16_t nvs_sig; /* NDIS_NVS_CHIM_SIG */ uint8_t nvs_rsvd[22]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_chim_conn) >= HN_NVS_REQSIZE_MIN); @@ -193,6 +203,7 @@ struct hn_nvs_chim_disconn { uint32_t nvs_type; /* HN_NVS_TYPE_CHIM_DISCONN */ uint16_t nvs_sig; /* HN_NVS_CHIM_SIG */ uint8_t nvs_rsvd[26]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_chim_disconn) >= HN_NVS_REQSIZE_MIN); @@ -203,6 +214,7 @@ struct hn_nvs_subch_req { uint32_t nvs_op; /* HN_NVS_SUBCH_OP_ */ uint32_t nvs_nsubch; uint8_t nvs_rsvd[20]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_subch_req) >= HN_NVS_REQSIZE_MIN); @@ -226,6 +238,7 @@ struct hn_nvs_rndis { uint32_t nvs_chim_idx; uint32_t nvs_chim_sz; uint8_t nvs_rsvd[16]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_rndis) >= HN_NVS_REQSIZE_MIN); @@ -233,6 +246,7 @@ struct hn_nvs_rndis_ack { uint32_t nvs_type; /* HN_NVS_TYPE_RNDIS_ACK */ uint32_t nvs_status; /* HN_NVS_STATUS_ */ uint8_t nvs_rsvd[24]; + uint8_t nvs_msg_pad[8]; } __packed; CTASSERT(sizeof(struct hn_nvs_rndis_ack) >= HN_NVS_REQSIZE_MIN); diff --git a/sys/dev/hyperv/netvsc/if_hnvar.h b/sys/dev/hyperv/netvsc/if_hnvar.h index c0e17c9643e0..27d93db5395e 100644 --- a/sys/dev/hyperv/netvsc/if_hnvar.h +++ b/sys/dev/hyperv/netvsc/if_hnvar.h @@ -33,7 +33,7 @@ #define HN_CHIM_SIZE (15 * 1024 * 1024) -#define HN_RXBUF_SIZE (16 * 1024 * 1024) +#define HN_RXBUF_SIZE (31 * 1024 * 1024) #define HN_RXBUF_SIZE_COMPAT (15 * 1024 * 1024) #define HN_MTU_MAX (65535 - ETHER_ADDR_LEN) @@ -56,6 +56,20 @@ struct buf_ring; #endif struct hn_tx_ring; +#define HN_NVS_RSC_MAX 562 /* Max RSC frags in one vmbus packet */ + +struct hn_rx_rsc { + const uint32_t *vlan_info; + const uint32_t *csum_info; + const uint32_t *hash_info; + const uint32_t *hash_value; + uint32_t cnt; /* fragment count */ + uint32_t pktlen; /* full packet length */ + uint8_t is_last; /* last fragment */ + const void *frag_data[HN_NVS_RSC_MAX]; + uint32_t frag_len[HN_NVS_RSC_MAX]; +}; + struct hn_rx_ring { struct ifnet *hn_ifp; struct ifnet *hn_rxvf_ifp; /* SR-IOV VF for RX */ @@ -66,6 +80,7 @@ struct hn_rx_ring { uint32_t hn_mbuf_hash; /* NDIS_HASH_ */ uint8_t *hn_rxbuf; /* shadow sc->hn_rxbuf */ int hn_rx_idx; + struct hn_rx_rsc rsc; /* Trust csum verification on host side */ int hn_trust_hcsum; /* HN_TRUST_HCSUM_ */ @@ -80,6 +95,8 @@ struct hn_rx_ring { u_long hn_pkts; u_long hn_rss_pkts; u_long hn_ack_failed; + u_long hn_rsc_pkts; + u_long hn_rsc_drop; /* Rarely used stuffs */ struct sysctl_oid *hn_rx_sysctl_tree; diff --git a/sys/dev/hyperv/netvsc/ndis.h b/sys/dev/hyperv/netvsc/ndis.h index 32b6aa307452..c69da7807a63 100644 --- a/sys/dev/hyperv/netvsc/ndis.h +++ b/sys/dev/hyperv/netvsc/ndis.h @@ -115,8 +115,8 @@ struct ndis_offload_params { /* NDIS >= 6.30 */ uint8_t ndis_rsc_ip4; /* NDIS_OFFLOAD_RSC_ */ uint8_t ndis_rsc_ip6; /* NDIS_OFFLOAD_RSC_ */ - uint32_t ndis_encap; /* NDIS_OFFLOAD_SET_ */ - uint32_t ndis_encap_types;/* NDIS_ENCAP_TYPE_ */ + uint8_t ndis_encap; /* NDIS_OFFLOAD_SET_ */ + uint8_t ndis_encap_types;/* NDIS_ENCAP_TYPE_ */ }; #define NDIS_OFFLOAD_PARAMS_SIZE sizeof(struct ndis_offload_params) @@ -305,17 +305,17 @@ struct ndis_lsov2_offload { struct ndis_ipsecv2_offload { uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/ - uint16_t ndis_ip6; - uint16_t ndis_ip4opt; - uint16_t ndis_ip6ext; - uint16_t ndis_ah; - uint16_t ndis_esp; - uint16_t ndis_ah_esp; - uint16_t ndis_xport; - uint16_t ndis_tun; - uint16_t ndis_xport_tun; - uint16_t ndis_lso; - uint16_t ndis_extseq; + uint8_t ndis_ip6; + uint8_t ndis_ip4opt; + uint8_t ndis_ip6ext; + uint8_t ndis_ah; + uint8_t ndis_esp; + uint8_t ndis_ah_esp; + uint8_t ndis_xport; + uint8_t ndis_tun; + uint8_t ndis_xport_tun; + uint8_t ndis_lso; + uint8_t ndis_extseq; uint32_t ndis_udp_esp; uint32_t ndis_auth; uint32_t ndis_crypto; @@ -323,8 +323,8 @@ struct ndis_ipsecv2_offload { }; struct ndis_rsc_offload { - uint16_t ndis_ip4; - uint16_t ndis_ip6; + uint8_t ndis_ip4; + uint8_t ndis_ip6; }; struct ndis_encap_offload { @@ -419,5 +419,4 @@ struct ndis_offload { #define NDIS_TXCSUM_INFO_MKUDPCS(thoff) \ NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_UDPCS) - #endif /* !_NET_NDIS_H_ */ diff --git a/sys/net/rndis.h b/sys/net/rndis.h index 5d45469a4b52..56350dbb7824 100644 --- a/sys/net/rndis.h +++ b/sys/net/rndis.h @@ -142,7 +142,8 @@ struct rndis_packet_msg { /* Per-packet-info for RNDIS data message */ struct rndis_pktinfo { uint32_t rm_size; - uint32_t rm_type; /* NDIS_PKTINFO_TYPE_ */ + uint32_t rm_type:31; /* NDIS_PKTINFO_TYPE_ */ + uint32_t rm_internal:1; /* Indicate if internal type */ uint32_t rm_pktinfooffset; uint8_t rm_data[]; }; @@ -165,6 +166,10 @@ struct rndis_pktinfo { #define NDIS_PKTINFO_TYPE_CACHE_NBLIST 10 #define NDIS_PKTINFO_TYPE_PKT_PAD 11 +/* Per-packet-info internal type */ +#define NDIS_PKTINFO_IT_PKTINFO_ID 1 +/* Add more internal type here */ + /* * RNDIS control messages */