From owner-dev-commits-src-branches@freebsd.org Mon Jul 26 15:19:33 2021 Return-Path: Delivered-To: dev-commits-src-branches@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 4865366DAC3; Mon, 26 Jul 2021 15:19:33 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4GYNrP1MGwz3JmS; Mon, 26 Jul 2021 15:19:33 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id B09EC19628; Mon, 26 Jul 2021 15:19:32 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 16QFJWh5025505; Mon, 26 Jul 2021 15:19:32 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 16QFJWdU025504; Mon, 26 Jul 2021 15:19:32 GMT (envelope-from git) Date: Mon, 26 Jul 2021 15:19:32 GMT Message-Id: <202107261519.16QFJWdU025504@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: Konstantin Belousov Subject: git: 56fd9780ad57 - stable/13 - mlx5en: handle checksum and TSO offloading for VxLAN packets on TX MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: kib X-Git-Repository: src X-Git-Refname: refs/heads/stable/13 X-Git-Reftype: branch X-Git-Commit: 56fd9780ad57d06133e19bec13b0e54aae36b809 Auto-Submitted: auto-generated X-BeenThere: dev-commits-src-branches@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Commits to the stable branches of the FreeBSD src repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 26 Jul 2021 15:19:33 -0000 The branch stable/13 has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=56fd9780ad57d06133e19bec13b0e54aae36b809 commit 56fd9780ad57d06133e19bec13b0e54aae36b809 Author: Konstantin Belousov AuthorDate: 2021-04-06 03:45:20 +0000 Commit: Konstantin Belousov CommitDate: 2021-07-26 13:51:10 +0000 mlx5en: handle checksum and TSO offloading for VxLAN packets on TX (cherry picked from commit 7c9febf9f11bec467ef79c6ff19d695ace7edb6b) --- sys/dev/mlx5/mlx5_en/mlx5_en_tx.c | 289 +++++++++++++++++++++++++++++++++++++- 1 file changed, 287 insertions(+), 2 deletions(-) diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c index 1ba47e44ae96..6bc0a91ca8ef 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c @@ -332,6 +332,221 @@ failure: return (0); } +/* + * Locate a pointer inside a mbuf chain. Returns NULL upon failure. + */ +static inline void * +mlx5e_parse_mbuf_chain(const struct mbuf **mb, int *poffset, int eth_hdr_len, + int min_len) +{ + if (unlikely(mb[0]->m_len == eth_hdr_len)) { + poffset[0] = eth_hdr_len; + if (unlikely((mb[0] = mb[0]->m_next) == NULL)) + return (NULL); + } + if (unlikely(mb[0]->m_len < eth_hdr_len - poffset[0] + min_len)) + return (NULL); + return (mb[0]->m_data + eth_hdr_len - poffset[0]); +} + +/* + * This function parse IPv4 and IPv6 packets looking for UDP, VXLAN + * and TCP headers. + * + * The return value indicates the number of bytes from the beginning + * of the packet until the first byte after the TCP header. If this + * function returns zero, the parsing failed. + */ +static int +mlx5e_get_vxlan_header_size(const struct mbuf *mb, struct mlx5e_tx_wqe *wqe, + uint8_t cs_mask, uint8_t opcode) +{ + const struct ether_vlan_header *eh; + struct ip *ip4; + struct ip6_hdr *ip6; + struct tcphdr *th; + struct udphdr *udp; + bool has_outer_vlan_tag; + uint16_t eth_type; + uint8_t ip_type; + int pkt_hdr_len; + int eth_hdr_len; + int tcp_hlen; + int ip_hlen; + int offset; + + pkt_hdr_len = mb->m_pkthdr.len; + has_outer_vlan_tag = (mb->m_flags & M_VLANTAG) != 0; + offset = 0; + + eh = mtod(mb, const struct ether_vlan_header *); + if (unlikely(mb->m_len < ETHER_HDR_LEN)) + return (0); + + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + if (unlikely(mb->m_len < ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)) + return (0); + eth_type = eh->evl_proto; + eth_hdr_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + eth_type = eh->evl_encap_proto; + eth_hdr_len = ETHER_HDR_LEN; + } + + switch (eth_type) { + case htons(ETHERTYPE_IP): + ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, + sizeof(*ip4)); + if (unlikely(ip4 == NULL)) + return (0); + ip_type = ip4->ip_p; + if (unlikely(ip_type != IPPROTO_UDP)) + return (0); + wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2; + wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + ip_hlen = ip4->ip_hl << 2; + eth_hdr_len += ip_hlen; + udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, + sizeof(*udp)); + if (unlikely(udp == NULL)) + return (0); + wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2; + wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE; + eth_hdr_len += sizeof(*udp); + break; + case htons(ETHERTYPE_IPV6): + ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, + sizeof(*ip6)); + if (unlikely(ip6 == NULL)) + return (0); + ip_type = ip6->ip6_nxt; + if (unlikely(ip_type != IPPROTO_UDP)) + return (0); + wqe->eth.swp_outer_l3_offset = eth_hdr_len / 2; + wqe->eth.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + eth_hdr_len += sizeof(*ip6); + udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, + sizeof(*udp)); + if (unlikely(udp == NULL)) + return (0); + wqe->eth.swp_outer_l4_offset = eth_hdr_len / 2; + wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_TYPE | + MLX5_ETH_WQE_SWP_OUTER_L3_TYPE; + eth_hdr_len += sizeof(*udp); + break; + default: + return (0); + } + + /* + * If the hardware is not computing inner IP checksum, then + * skip inlining the inner outer UDP and VXLAN header: + */ + if (unlikely((cs_mask & MLX5_ETH_WQE_L3_INNER_CSUM) == 0)) + goto done; + if (unlikely(mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, + 8) == NULL)) + return (0); + eth_hdr_len += 8; + + /* Check for ethernet header again. */ + eh = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, ETHER_HDR_LEN); + if (unlikely(eh == NULL)) + return (0); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + if (unlikely(mb->m_len < eth_hdr_len - offset + ETHER_HDR_LEN + + ETHER_VLAN_ENCAP_LEN)) + return (0); + eth_type = eh->evl_proto; + eth_hdr_len += ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + eth_type = eh->evl_encap_proto; + eth_hdr_len += ETHER_HDR_LEN; + } + + /* Check for IP header again. */ + switch (eth_type) { + case htons(ETHERTYPE_IP): + ip4 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, + sizeof(*ip4)); + if (unlikely(ip4 == NULL)) + return (0); + wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2; + wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + ip_type = ip4->ip_p; + ip_hlen = ip4->ip_hl << 2; + eth_hdr_len += ip_hlen; + break; + case htons(ETHERTYPE_IPV6): + ip6 = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, + sizeof(*ip6)); + if (unlikely(ip6 == NULL)) + return (0); + wqe->eth.swp_inner_l3_offset = eth_hdr_len / 2; + wqe->eth.cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_TYPE; + ip_type = ip6->ip6_nxt; + eth_hdr_len += sizeof(*ip6); + break; + default: + return (0); + } + + /* + * If the hardware is not computing inner UDP/TCP checksum, + * then skip inlining the inner UDP/TCP header: + */ + if (unlikely((cs_mask & MLX5_ETH_WQE_L4_INNER_CSUM) == 0)) + goto done; + + switch (ip_type) { + case IPPROTO_UDP: + udp = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, + sizeof(*udp)); + if (unlikely(udp == NULL)) + return (0); + wqe->eth.swp_inner_l4_offset = (eth_hdr_len / 2); + wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE; + eth_hdr_len += sizeof(*udp); + break; + case IPPROTO_TCP: + th = mlx5e_parse_mbuf_chain(&mb, &offset, eth_hdr_len, + sizeof(*th)); + if (unlikely(th == NULL)) + return (0); + wqe->eth.swp_inner_l4_offset = eth_hdr_len / 2; + wqe->eth.cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + wqe->eth.swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_TYPE; + tcp_hlen = th->th_off << 2; + eth_hdr_len += tcp_hlen; + break; + default: + return (0); + } +done: + if (unlikely(pkt_hdr_len < eth_hdr_len)) + return (0); + + /* Account for software inserted VLAN tag, if any. */ + if (unlikely(has_outer_vlan_tag)) { + wqe->eth.swp_outer_l3_offset += ETHER_VLAN_ENCAP_LEN / 2; + wqe->eth.swp_outer_l4_offset += ETHER_VLAN_ENCAP_LEN / 2; + wqe->eth.swp_inner_l3_offset += ETHER_VLAN_ENCAP_LEN / 2; + wqe->eth.swp_inner_l4_offset += ETHER_VLAN_ENCAP_LEN / 2; + } + + /* + * When inner checksums are set, outer L4 checksum flag must + * be disabled. + */ + if (wqe->eth.cs_flags & (MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_INNER_CSUM)) + wqe->eth.cs_flags &= ~MLX5_ETH_WQE_L4_CSUM; + + return (eth_hdr_len); +} + struct mlx5_wqe_dump_seg { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_data_seg data; @@ -574,8 +789,77 @@ top: num_pkts = DIV_ROUND_UP(payload_len, mss); sq->mbuf[pi].num_bytes = payload_len + (num_pkts * args.ihs); + sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; + } else if (mb->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN) { + /* check for inner TCP TSO first */ + if (mb->m_pkthdr.csum_flags & (CSUM_INNER_IP_TSO | + CSUM_INNER_IP6_TSO)) { + u32 payload_len; + u32 mss = mb->m_pkthdr.tso_segsz; + u32 num_pkts; + + wqe->eth.mss = cpu_to_be16(mss); + opcode = MLX5_OPCODE_LSO; + + if (likely(args.ihs == 0)) { + args.ihs = mlx5e_get_vxlan_header_size(mb, wqe, + MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_INNER_CSUM | + MLX5_ETH_WQE_L4_CSUM | + MLX5_ETH_WQE_L3_CSUM, + opcode); + if (unlikely(args.ihs == 0)) { + err = EINVAL; + goto tx_drop; + } + } + + payload_len = mb->m_pkthdr.len - args.ihs; + if (payload_len == 0) + num_pkts = 1; + else + num_pkts = DIV_ROUND_UP(payload_len, mss); + sq->mbuf[pi].num_bytes = payload_len + + num_pkts * args.ihs; + + sq->stats.tso_packets++; + sq->stats.tso_bytes += payload_len; + } else { + opcode = MLX5_OPCODE_SEND; + + if (likely(args.ihs == 0)) { + uint8_t cs_mask; + + if (mb->m_pkthdr.csum_flags & + (CSUM_INNER_IP_TCP | CSUM_INNER_IP_UDP)) { + cs_mask = + MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_INNER_CSUM | + MLX5_ETH_WQE_L4_CSUM | + MLX5_ETH_WQE_L3_CSUM; + } else if (mb->m_pkthdr.csum_flags & CSUM_INNER_IP) { + cs_mask = + MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_CSUM | + MLX5_ETH_WQE_L3_CSUM; + } else { + cs_mask = + MLX5_ETH_WQE_L4_CSUM | + MLX5_ETH_WQE_L3_CSUM; + } + args.ihs = mlx5e_get_vxlan_header_size(mb, wqe, + cs_mask, opcode); + if (unlikely(args.ihs == 0)) { + err = EINVAL; + goto tx_drop; + } + } + + sq->mbuf[pi].num_bytes = max_t (unsigned int, + mb->m_pkthdr.len, ETHER_MIN_LEN - ETHER_CRC_LEN); + } } else { opcode = MLX5_OPCODE_SEND; @@ -622,7 +906,7 @@ top: /* Range checks */ if (unlikely(args.ihs > (sq->max_inline - ETHER_VLAN_ENCAP_LEN))) { - if (mb->m_pkthdr.csum_flags & CSUM_TSO) { + if (mb->m_pkthdr.csum_flags & (CSUM_TSO | CSUM_ENCAP_VXLAN)) { err = EINVAL; goto tx_drop; } @@ -646,7 +930,8 @@ top: } else { /* check if inline header size is too big */ if (unlikely(args.ihs > sq->max_inline)) { - if (unlikely(mb->m_pkthdr.csum_flags & CSUM_TSO)) { + if (unlikely(mb->m_pkthdr.csum_flags & (CSUM_TSO | + CSUM_ENCAP_VXLAN))) { err = EINVAL; goto tx_drop; }