From owner-svn-src-all@freebsd.org Thu Dec 3 14:56:18 2015 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id BF079A3F5C8; Thu, 3 Dec 2015 14:56:18 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 7F14518B3; Thu, 3 Dec 2015 14:56:18 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id tB3EuHci030972; Thu, 3 Dec 2015 14:56:17 GMT (envelope-from hselasky@FreeBSD.org) Received: (from hselasky@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id tB3EuHVs030967; Thu, 3 Dec 2015 14:56:17 GMT (envelope-from hselasky@FreeBSD.org) Message-Id: <201512031456.tB3EuHVs030967@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: hselasky set sender to hselasky@FreeBSD.org using -f From: Hans Petter Selasky Date: Thu, 3 Dec 2015 14:56:17 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r291699 - in head/sys: modules/mlxen ofed/drivers/net/mlx4 X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 03 Dec 2015 14:56:18 -0000 Author: hselasky Date: Thu Dec 3 14:56:17 2015 New Revision: 291699 URL: https://svnweb.freebsd.org/changeset/base/291699 Log: Convert the mlxen driver to use the BUSDMA(9) APIs instead of vtophys() when loading mbufs for transmission and reception. While at it all pointer arithmetic and cast qualifier issues were fixed, mostly related to transmission and reception. MFC after: 1 week Sponsored by: Mellanox Technologies Differential Revision: https://reviews.freebsd.org/D4284 Modified: head/sys/modules/mlxen/Makefile head/sys/ofed/drivers/net/mlx4/en_netdev.c head/sys/ofed/drivers/net/mlx4/en_rx.c head/sys/ofed/drivers/net/mlx4/en_tx.c head/sys/ofed/drivers/net/mlx4/mlx4_en.h Modified: head/sys/modules/mlxen/Makefile ============================================================================== --- head/sys/modules/mlxen/Makefile Thu Dec 3 14:38:55 2015 (r291698) +++ head/sys/modules/mlxen/Makefile Thu Dec 3 14:56:17 2015 (r291699) @@ -11,5 +11,3 @@ CFLAGS+= -I${.CURDIR}/../../ofed/include CFLAGS+= -I${.CURDIR}/../../compat/linuxkpi/common/include .include - -CFLAGS+= -Wno-cast-qual -Wno-pointer-arith Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c ============================================================================== --- head/sys/ofed/drivers/net/mlx4/en_netdev.c Thu Dec 3 14:38:55 2015 (r291698) +++ head/sys/ofed/drivers/net/mlx4/en_netdev.c Thu Dec 3 14:56:17 2015 (r291699) @@ -1247,7 +1247,6 @@ int mlx4_en_start_port(struct net_device PAGE_SIZE); priv->rx_alloc_order = get_order(priv->rx_alloc_size); priv->rx_buf_size = roundup_pow_of_two(priv->rx_mb_size); - priv->log_rx_info = ROUNDUP_LOG2(sizeof(struct mlx4_en_rx_buf)); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size); /* Configure rx cq's and rings */ @@ -2091,8 +2090,6 @@ int mlx4_en_init_netdev(struct mlx4_en_d priv->port = port; priv->port_up = false; priv->flags = prof->flags; - priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | - MLX4_WQE_CTRL_SOLICITED); priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; priv->tx_ring_num = prof->tx_ring_num; @@ -2108,7 +2105,7 @@ int mlx4_en_init_netdev(struct mlx4_en_d err = -ENOMEM; goto out; } - + priv->rx_ring_num = prof->rx_ring_num; priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; priv->mac_index = -1; Modified: head/sys/ofed/drivers/net/mlx4/en_rx.c ============================================================================== --- head/sys/ofed/drivers/net/mlx4/en_rx.c Thu Dec 3 14:38:55 2015 (r291698) +++ head/sys/ofed/drivers/net/mlx4/en_rx.c Thu Dec 3 14:56:17 2015 (r291699) @@ -54,104 +54,133 @@ static void mlx4_en_init_rx_desc(struct int possible_frags; int i; - /* Set size and memtype fields */ - for (i = 0; i < priv->num_frags; i++) { - rx_desc->data[i].byte_count = - cpu_to_be32(priv->frag_info[i].frag_size); - rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); - } + rx_desc->data[0].byte_count = cpu_to_be32(priv->rx_mb_size); + rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key); - /* If the number of used fragments does not fill up the ring stride, - * * remaining (unused) fragments must be padded with null address/size - * * and a special memory key */ + /* + * If the number of used fragments does not fill up the ring + * stride, remaining (unused) fragments must be padded with + * null address/size and a special memory key: + */ possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE; - for (i = priv->num_frags; i < possible_frags; i++) { + for (i = 1; i < possible_frags; i++) { rx_desc->data[i].byte_count = 0; rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD); rx_desc->data[i].addr = 0; } - } -static int mlx4_en_alloc_buf(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, - struct mbuf **mb_list, - int i) +static int +mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring, + __be64 *pdma, struct mlx4_en_rx_mbuf *mb_list) { - struct mlx4_en_dev *mdev = priv->mdev; - struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; + bus_dma_segment_t segs[1]; + bus_dmamap_t map; struct mbuf *mb; - dma_addr_t dma; + int nsegs; + int err; - if (i == 0) - mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, frag_info->frag_size); - else - mb = m_getjcl(M_NOWAIT, MT_DATA, 0, frag_info->frag_size); - if (mb == NULL) { - priv->port_stats.rx_alloc_failed++; - return -ENOMEM; + /* try to allocate a new spare mbuf */ + if (unlikely(ring->spare.mbuf == NULL)) { + mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size); + if (unlikely(mb == NULL)) + return (-ENOMEM); + /* setup correct length */ + mb->m_len = ring->rx_mb_size; + + /* load spare mbuf into BUSDMA */ + err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, ring->spare.dma_map, + mb, segs, &nsegs, BUS_DMA_NOWAIT); + if (unlikely(err != 0)) { + m_freem(mb); + return (err); + } + KASSERT(nsegs == 1, + ("Number of segments is expected to be one")); + + /* store spare info */ + ring->spare.mbuf = mb; + ring->spare.paddr_be = cpu_to_be64(segs[0].ds_addr); + + bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map, + BUS_DMASYNC_PREREAD); } - dma = pci_map_single(mdev->pdev, mb->m_data, frag_info->frag_size, - PCI_DMA_FROMDEVICE); - rx_desc->data[i].addr = cpu_to_be64(dma); - mb_list[i] = mb; - return 0; -} + /* synchronize and unload the current mbuf, if any */ + if (likely(mb_list->mbuf != NULL)) { + bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(ring->dma_tag, mb_list->dma_map); + } -static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring, int index) -{ - struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *) - (ring->buf + (index * ring->stride)); - struct mbuf **mb_list = ring->rx_info + (index << priv->log_rx_info); - int i; + mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size); + if (unlikely(mb == NULL)) + goto use_spare; - for (i = 0; i < priv->num_frags; i++) - if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, i)) - goto err; + /* setup correct length */ + mb->m_len = ring->rx_mb_size; + + err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, mb_list->dma_map, + mb, segs, &nsegs, BUS_DMA_NOWAIT); + if (unlikely(err != 0)) { + m_freem(mb); + goto use_spare; + } + KASSERT(nsegs == 1, ("Number of segments is expected to be one")); - return 0; + *pdma = cpu_to_be64(segs[0].ds_addr); + mb_list->mbuf = mb; -err: - while (i--) - m_free(mb_list[i]); - return -ENOMEM; + bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD); + return (0); + +use_spare: + /* swap DMA maps */ + map = mb_list->dma_map; + mb_list->dma_map = ring->spare.dma_map; + ring->spare.dma_map = map; + + /* swap MBUFs */ + mb_list->mbuf = ring->spare.mbuf; + ring->spare.mbuf = NULL; + + /* store physical address */ + *pdma = ring->spare.paddr_be; + return (0); } -static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) +static void +mlx4_en_free_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_mbuf *mb_list) { - *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); + bus_dmamap_t map = mb_list->dma_map; + bus_dmamap_sync(ring->dma_tag, map, BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(ring->dma_tag, map); + m_freem(mb_list->mbuf); + mb_list->mbuf = NULL; /* safety clearing */ } -static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring, - int index) +static int +mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, int index) { - struct mlx4_en_frag_info *frag_info; - struct mlx4_en_dev *mdev = priv->mdev; - struct mbuf **mb_list; struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *) - (ring->buf + (index << ring->log_stride)); - dma_addr_t dma; - int nr; - - mb_list = ring->rx_info + (index << priv->log_rx_info); - for (nr = 0; nr < priv->num_frags; nr++) { - en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); - frag_info = &priv->frag_info[nr]; - dma = be64_to_cpu(rx_desc->data[nr].addr); - -#if BITS_PER_LONG == 64 - en_dbg(DRV, priv, "Unmaping buffer at dma:0x%lx\n", (u64) dma); -#elif BITS_PER_LONG == 32 - en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma); -#endif - pci_unmap_single(mdev->pdev, dma, frag_info->frag_size, - PCI_DMA_FROMDEVICE); - m_free(mb_list[nr]); + (ring->buf + (index * ring->stride)); + struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index; + + mb_list->mbuf = NULL; + + if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) { + priv->port_stats.rx_alloc_failed++; + return (-ENOMEM); } + return (0); +} + +static inline void +mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) +{ + *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); } static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv) @@ -194,7 +223,8 @@ reduce_rings: while (ring->actual_size > new_size) { ring->actual_size--; ring->prod--; - mlx4_en_free_rx_desc(priv, ring, ring->actual_size); + mlx4_en_free_buf(ring, + ring->mbuf + ring->actual_size); } } @@ -214,100 +244,106 @@ static void mlx4_en_free_rx_buf(struct m while (ring->cons != ring->prod) { index = ring->cons & ring->size_mask; en_dbg(DRV, priv, "Processing descriptor:%d\n", index); - mlx4_en_free_rx_desc(priv, ring, index); + mlx4_en_free_buf(ring, ring->mbuf + index); ++ring->cons; } } -#if MLX4_EN_MAX_RX_FRAGS == 3 -static int frag_sizes[] = { - FRAG_SZ0, - FRAG_SZ1, - FRAG_SZ2, -}; -#elif MLX4_EN_MAX_RX_FRAGS == 2 -static int frag_sizes[] = { - FRAG_SZ0, - FRAG_SZ1, -}; -#else -#error "Unknown MAX_RX_FRAGS" -#endif - void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; - int buf_size = 0; - int i, frag; - for (i = 0, frag = 0; buf_size < eff_mtu; frag++, i++) { - /* - * Allocate small to large but only as much as is needed for - * the tail. - */ - while (i > 0 && eff_mtu - buf_size <= frag_sizes[i - 1]) - i--; - priv->frag_info[frag].frag_size = frag_sizes[i]; - priv->frag_info[frag].frag_prefix_size = buf_size; - buf_size += priv->frag_info[frag].frag_size; - } + if (eff_mtu > MJUM16BYTES) { + en_err(priv, "MTU(%d) is too big\n", dev->if_mtu); + eff_mtu = MJUM16BYTES; + } else if (eff_mtu > MJUM9BYTES) { + eff_mtu = MJUM16BYTES; + } else if (eff_mtu > MJUMPAGESIZE) { + eff_mtu = MJUM9BYTES; + } else if (eff_mtu > MCLBYTES) { + eff_mtu = MJUMPAGESIZE; + } else { + eff_mtu = MCLBYTES; + } - priv->num_frags = frag; priv->rx_mb_size = eff_mtu; - priv->log_rx_info = - ROUNDUP_LOG2(priv->num_frags * sizeof(struct mbuf *)); - en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d " - "num_frags:%d):\n", eff_mtu, priv->num_frags); - for (i = 0; i < priv->num_frags; i++) { - en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d\n", i, - priv->frag_info[i].frag_size, - priv->frag_info[i].frag_prefix_size); - } + en_dbg(DRV, priv, "Effective RX MTU: %d bytes\n", eff_mtu); } - int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, int node) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring; - int err = -ENOMEM; + int err; int tmp; + uint32_t x; ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL); if (!ring) { en_err(priv, "Failed to allocate RX ring structure\n"); return -ENOMEM; } - + + /* Create DMA descriptor TAG */ + if ((err = -bus_dma_tag_create( + bus_get_dma_tag(mdev->pdev->dev.bsddev), + 1, /* any alignment */ + 0, /* no boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MJUM16BYTES, /* maxsize */ + 1, /* nsegments */ + MJUM16BYTES, /* maxsegsize */ + 0, /* flags */ + NULL, NULL, /* lockfunc, lockfuncarg */ + &ring->dma_tag))) { + en_err(priv, "Failed to create DMA tag\n"); + goto err_ring; + } + ring->prod = 0; ring->cons = 0; ring->size = size; ring->size_mask = size - 1; - ring->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + - DS_SIZE * MLX4_EN_MAX_RX_FRAGS); + ring->stride = roundup_pow_of_two( + sizeof(struct mlx4_en_rx_desc) + DS_SIZE); ring->log_stride = ffs(ring->stride) - 1; ring->buf_size = ring->size * ring->stride + TXBB_SIZE; - tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * - sizeof(struct mbuf *)); + tmp = size * sizeof(struct mlx4_en_rx_mbuf); - ring->rx_info = kmalloc(tmp, GFP_KERNEL); - if (!ring->rx_info) { + ring->mbuf = kzalloc(tmp, GFP_KERNEL); + if (ring->mbuf == NULL) { err = -ENOMEM; - goto err_ring; + goto err_dma_tag; } - en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n", - ring->rx_info, tmp); + err = -bus_dmamap_create(ring->dma_tag, 0, &ring->spare.dma_map); + if (err != 0) + goto err_info; + + for (x = 0; x != size; x++) { + err = -bus_dmamap_create(ring->dma_tag, 0, + &ring->mbuf[x].dma_map); + if (err != 0) { + while (x--) + bus_dmamap_destroy(ring->dma_tag, + ring->mbuf[x].dma_map); + goto err_info; + } + } + en_dbg(DRV, priv, "Allocated MBUF ring at addr:%p size:%d\n", + ring->mbuf, tmp); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); if (err) - goto err_info; + goto err_dma_map; err = mlx4_en_map_buffer(&ring->wqres.buf); if (err) { @@ -320,23 +356,29 @@ int mlx4_en_create_rx_ring(struct mlx4_e err_hwq: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); +err_dma_map: + for (x = 0; x != size; x++) { + bus_dmamap_destroy(ring->dma_tag, + ring->mbuf[x].dma_map); + } + bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map); err_info: - vfree(ring->rx_info); + vfree(ring->mbuf); +err_dma_tag: + bus_dma_tag_destroy(ring->dma_tag); err_ring: kfree(ring); - - return err; + return (err); } - int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) { struct mlx4_en_rx_ring *ring; int i; int ring_ind; int err; - int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + - DS_SIZE * priv->num_frags); + int stride = roundup_pow_of_two( + sizeof(struct mlx4_en_rx_desc) + DS_SIZE); for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) { ring = priv->rx_ring[ring_ind]; @@ -412,10 +454,22 @@ void mlx4_en_destroy_rx_ring(struct mlx4 { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring = *pring; + uint32_t x; mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); - vfree(ring->rx_info); + for (x = 0; x != size; x++) + bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map); + /* free spare mbuf, if any */ + if (ring->spare.mbuf != NULL) { + bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(ring->dma_tag, ring->spare.dma_map); + m_freem(ring->spare.mbuf); + } + bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map); + vfree(ring->mbuf); + bus_dma_tag_destroy(ring->dma_tag); kfree(ring); *pring = NULL; #ifdef CONFIG_RFS_ACCEL @@ -423,7 +477,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4 #endif } - void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { @@ -472,69 +525,27 @@ static inline int invalid_cqe(struct mlx return 0; } - -/* Unmap a completed descriptor and free unused pages */ -static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, - struct mbuf **mb_list, - int length) -{ - struct mlx4_en_dev *mdev = priv->mdev; - struct mlx4_en_frag_info *frag_info; - dma_addr_t dma; - struct mbuf *mb; - int nr; - - mb = mb_list[0]; - mb->m_pkthdr.len = length; - /* Collect used fragments while replacing them in the HW descirptors */ - for (nr = 0; nr < priv->num_frags; nr++) { - frag_info = &priv->frag_info[nr]; - if (length <= frag_info->frag_prefix_size) - break; - if (nr) - mb->m_next = mb_list[nr]; - mb = mb_list[nr]; - mb->m_len = frag_info->frag_size; - dma = be64_to_cpu(rx_desc->data[nr].addr); - - /* Allocate a replacement page */ - if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, nr)) - goto fail; - - /* Unmap buffer */ - pci_unmap_single(mdev->pdev, dma, frag_info->frag_size, - PCI_DMA_FROMDEVICE); - } - /* Adjust size of last fragment to match actual length */ - mb->m_len = length - priv->frag_info[nr - 1].frag_prefix_size; - mb->m_next = NULL; - return 0; - -fail: - /* Drop all accumulated fragments (which have already been replaced in - * the descriptor) of this packet; remaining fragments are reused... */ - while (nr > 0) { - nr--; - m_free(mb_list[nr]); - } - return -ENOMEM; - -} - -static struct mbuf *mlx4_en_rx_mb(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, - struct mbuf **mb_list, - unsigned int length) +static struct mbuf * +mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list, + int length) { struct mbuf *mb; - mb = mb_list[0]; - /* Move relevant fragments to mb */ - if (unlikely(mlx4_en_complete_rx_desc(priv, rx_desc, mb_list, length))) - return NULL; + /* get mbuf */ + mb = mb_list->mbuf; - return mb; + /* collect used fragment while atomically replacing it */ + if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) + return (NULL); + + /* range check hardware computed value */ + if (unlikely(length > mb->m_len)) + length = mb->m_len; + + /* update total packet length in packet header */ + mb->m_len = mb->m_pkthdr.len = length; + return (mb); } /* For cpu arch with cache line of 64B the performance is better when cqe size==64B @@ -548,7 +559,7 @@ int mlx4_en_process_rx_cq(struct net_dev struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; - struct mbuf **mb_list; + struct mlx4_en_rx_mbuf *mb_list; struct mlx4_en_rx_desc *rx_desc; struct mbuf *mb; struct mlx4_cq *mcq = &cq->mcq; @@ -576,7 +587,7 @@ int mlx4_en_process_rx_cq(struct net_dev /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cons_index & size)) { - mb_list = ring->rx_info + (index << priv->log_rx_info); + mb_list = ring->mbuf + index; rx_desc = (struct mlx4_en_rx_desc *) (ring->buf + (index << ring->log_stride)); @@ -593,8 +604,9 @@ int mlx4_en_process_rx_cq(struct net_dev */ length = be32_to_cpu(cqe->byte_cnt); length -= ring->fcs_del; - mb = mlx4_en_rx_mb(priv, rx_desc, mb_list, length); - if (!mb) { + + mb = mlx4_en_rx_mb(priv, ring, rx_desc, mb_list, length); + if (unlikely(!mb)) { ring->errors++; goto next; } Modified: head/sys/ofed/drivers/net/mlx4/en_tx.c ============================================================================== --- head/sys/ofed/drivers/net/mlx4/en_tx.c Thu Dec 3 14:38:55 2015 (r291698) +++ head/sys/ofed/drivers/net/mlx4/en_tx.c Thu Dec 3 14:56:17 2015 (r291699) @@ -67,6 +67,7 @@ int mlx4_en_create_tx_ring(struct mlx4_e { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring; + uint32_t x; int tmp; int err; @@ -79,11 +80,26 @@ int mlx4_en_create_tx_ring(struct mlx4_e } } + /* Create DMA descriptor TAG */ + if ((err = -bus_dma_tag_create( + bus_get_dma_tag(mdev->pdev->dev.bsddev), + 1, /* any alignment */ + 0, /* no boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + MLX4_EN_TX_MAX_PAYLOAD_SIZE, /* maxsize */ + MLX4_EN_TX_MAX_MBUF_FRAGS, /* nsegments */ + MLX4_EN_TX_MAX_MBUF_SIZE, /* maxsegsize */ + 0, /* flags */ + NULL, NULL, /* lockfunc, lockfuncarg */ + &ring->dma_tag))) + goto done; + ring->size = size; ring->size_mask = size - 1; ring->stride = stride; - ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; - ring->inline_thold = min(inline_thold, MAX_INLINE); + ring->inline_thold = MAX(MIN_PKT_LEN, MIN(inline_thold, MAX_INLINE)); mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF); mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF); @@ -92,30 +108,36 @@ int mlx4_en_create_tx_ring(struct mlx4_e M_WAITOK, &ring->tx_lock.m); if (ring->br == NULL) { en_err(priv, "Failed allocating tx_info ring\n"); - return -ENOMEM; + err = -ENOMEM; + goto err_free_dma_tag; } tmp = size * sizeof(struct mlx4_en_tx_info); - ring->tx_info = vmalloc_node(tmp, node); + ring->tx_info = kzalloc_node(tmp, GFP_KERNEL, node); if (!ring->tx_info) { - ring->tx_info = vmalloc(tmp); + ring->tx_info = kzalloc(tmp, GFP_KERNEL); if (!ring->tx_info) { err = -ENOMEM; goto err_ring; } } - en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", - ring->tx_info, tmp); - - ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node); - if (!ring->bounce_buf) { - ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); - if (!ring->bounce_buf) { - err = -ENOMEM; + /* Create DMA descriptor MAPs */ + for (x = 0; x != size; x++) { + err = -bus_dmamap_create(ring->dma_tag, 0, + &ring->tx_info[x].dma_map); + if (err != 0) { + while (x--) { + bus_dmamap_destroy(ring->dma_tag, + ring->tx_info[x].dma_map); + } goto err_info; } } + + en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", + ring->tx_info, tmp); + ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); /* Allocate HW buffers on provided NUMA node */ @@ -123,7 +145,7 @@ int mlx4_en_create_tx_ring(struct mlx4_e 2 * PAGE_SIZE); if (err) { en_err(priv, "Failed allocating hwq resources\n"); - goto err_bounce; + goto err_dma_map; } err = mlx4_en_map_buffer(&ring->wqres.buf); @@ -173,12 +195,16 @@ err_map: mlx4_en_unmap_buffer(&ring->wqres.buf); err_hwq_res: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); -err_bounce: - kfree(ring->bounce_buf); +err_dma_map: + for (x = 0; x != size; x++) + bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map); err_info: vfree(ring->tx_info); err_ring: buf_ring_free(ring->br, M_DEVBUF); +err_free_dma_tag: + bus_dma_tag_destroy(ring->dma_tag); +done: kfree(ring); return err; } @@ -188,6 +214,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4 { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_ring *ring = *pring; + uint32_t x; en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn); buf_ring_free(ring->br, M_DEVBUF); @@ -198,10 +225,12 @@ void mlx4_en_destroy_tx_ring(struct mlx4 mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); - kfree(ring->bounce_buf); + for (x = 0; x != ring->size; x++) + bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map); vfree(ring->tx_info); mtx_destroy(&ring->tx_lock.m); mtx_destroy(&ring->comp_lock.m); + bus_dma_tag_destroy(ring->dma_tag); kfree(ring); *pring = NULL; } @@ -219,7 +248,6 @@ int mlx4_en_activate_tx_ring(struct mlx4 ring->last_nr_txbb = 1; ring->poll_cnt = 0; ring->blocked = 0; - memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); memset(ring->buf, 0, ring->buf_size); ring->qp_state = MLX4_QP_STATE_RST; @@ -244,99 +272,63 @@ void mlx4_en_deactivate_tx_ring(struct m MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); } -static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, - int index, u8 owner) +static volatile struct mlx4_wqe_data_seg * +mlx4_en_store_inline_lso_data(volatile struct mlx4_wqe_data_seg *dseg, + struct mbuf *mb, int len, __be32 owner_bit) { - struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; - struct mlx4_en_tx_desc *tx_desc = (struct mlx4_en_tx_desc *) - (ring->buf + index * TXBB_SIZE); - void *end = ring->buf + ring->buf_size; - __be32 *ptr = (__be32 *)tx_desc; - __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); - int i; - - /* Optimize the common case when there are no wraparounds */ - if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) - /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { - *ptr = stamp; - ptr += STAMP_DWORDS; - } - else - /* Stamp the freed descriptor */ - for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { - *ptr = stamp; - ptr += STAMP_DWORDS; - if ((void *)ptr >= end) { - ptr = (__be32 *)ring->buf; - stamp ^= cpu_to_be32(0x80000000); - } - } + uint8_t *inl = __DEVOLATILE(uint8_t *, dseg); + + /* copy data into place */ + m_copydata(mb, 0, len, inl + 4); + dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT); + return (dseg); } -static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp) +static void +mlx4_en_store_inline_lso_header(volatile struct mlx4_wqe_data_seg *dseg, + int len, __be32 owner_bit) +{ +} + +static void +mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, u32 index, u8 owner) { - struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = (struct mlx4_en_tx_desc *) - (ring->buf + index * TXBB_SIZE); - struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; - struct mbuf *mb = tx_info->mb; - void *end = ring->buf + ring->buf_size; - int frags = tx_info->nr_segs;; - int i; - - /* Optimize the common case when there are no wraparounds */ - if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { - if (!tx_info->inl) { - if (tx_info->linear) { - dma_unmap_single(priv->ddev, - (dma_addr_t) be64_to_cpu(data->addr), - be32_to_cpu(data->byte_count), - PCI_DMA_TODEVICE); - ++data; - } + (ring->buf + (index * TXBB_SIZE)); + volatile __be32 *ptr = (__be32 *)tx_desc; + const __be32 stamp = cpu_to_be32(STAMP_VAL | + ((u32)owner << STAMP_SHIFT)); + u32 i; + + /* Stamp the freed descriptor */ + for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { + *ptr = stamp; + ptr += STAMP_DWORDS; + } +} - for (i = 0; i < frags; i++) { - pci_unmap_single(mdev->pdev, - (dma_addr_t) be64_to_cpu(data[i].addr), - data[i].byte_count, PCI_DMA_TODEVICE); - } - } - } else { - if (!tx_info->inl) { - if ((void *) data >= end) { - data = (struct mlx4_wqe_data_seg *) - (ring->buf + ((void *)data - end)); - } +static u32 +mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, u32 index) +{ + struct mlx4_en_tx_info *tx_info; + struct mbuf *mb; - if (tx_info->linear) { - dma_unmap_single(priv->ddev, - (dma_addr_t) be64_to_cpu(data->addr), - be32_to_cpu(data->byte_count), - PCI_DMA_TODEVICE); - ++data; - } + tx_info = &ring->tx_info[index]; + mb = tx_info->mb; + + if (mb == NULL) + goto done; + + bus_dmamap_sync(ring->dma_tag, tx_info->dma_map, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(ring->dma_tag, tx_info->dma_map); - for (i = 0; i < frags; i++) { - /* Check for wraparound before unmapping */ - if ((void *) data >= end) - data = (struct mlx4_wqe_data_seg *)ring->buf; - pci_unmap_single(mdev->pdev, - (dma_addr_t) be64_to_cpu(data->addr), - data->byte_count, PCI_DMA_TODEVICE); - ++data; - } - } - } - /* Send a copy of the frame to the BPF listener */ - if (priv->dev && priv->dev->if_bpf) - ETHER_BPF_MTAP(priv->dev, mb); m_freem(mb); - return tx_info->nr_txbb; +done: + return (tx_info->nr_txbb); } int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) @@ -356,8 +348,7 @@ int mlx4_en_free_tx_buf(struct net_devic while (ring->cons != ring->prod) { ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, - ring->cons & ring->size_mask, - !!(ring->cons & ring->size), 0); + ring->cons & ring->size_mask); ring->cons += ring->last_nr_txbb; cnt++; } @@ -368,6 +359,14 @@ int mlx4_en_free_tx_buf(struct net_devic return cnt; } +static bool +mlx4_en_tx_ring_is_full(struct mlx4_en_tx_ring *ring) +{ + int wqs; + wqs = ring->size - (ring->prod - ring->cons); + return (wqs < (HEADROOM + (2 * MLX4_EN_TX_WQE_MAX_WQEBBS))); +} + static int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) { @@ -383,12 +382,7 @@ static int mlx4_en_process_tx_cq(struct int size = cq->size; u32 size_mask = ring->size_mask; struct mlx4_cqe *buf = cq->buf; - u32 packets = 0; - u32 bytes = 0; int factor = priv->cqe_factor; - u64 timestamp = 0; - int done = 0; - if (!priv->port_up) return 0; @@ -423,16 +417,12 @@ static int mlx4_en_process_tx_cq(struct ring_index = (ring_index + ring->last_nr_txbb) & size_mask; /* free next descriptor */ ring->last_nr_txbb = mlx4_en_free_tx_desc( - priv, ring, ring_index, - !!((ring->cons + txbbs_skipped) & - ring->size), timestamp); + priv, ring, ring_index); mlx4_en_stamp_wqe(priv, ring, stamp_index, !!((ring->cons + txbbs_stamp) & ring->size)); stamp_index = ring_index; txbbs_stamp = txbbs_skipped; - packets++; - bytes += ring->tx_info[ring_index].nr_bytes; } while (ring_index != new_index); ++cons_index; @@ -451,15 +441,14 @@ static int mlx4_en_process_tx_cq(struct ring->cons += txbbs_skipped; /* Wakeup Tx queue if it was stopped and ring is not full */ - if (unlikely(ring->blocked) && - (ring->prod - ring->cons) <= ring->full_size) { + if (unlikely(ring->blocked) && !mlx4_en_tx_ring_is_full(ring)) { ring->blocked = 0; if (atomic_fetchadd_int(&priv->blocked, -1) == 1) atomic_clear_int(&dev->if_drv_flags ,IFF_DRV_OACTIVE); ring->wake_queue++; priv->port_stats.wake_queue++; } - return done; + return (0); } void mlx4_en_tx_irq(struct mlx4_cq *mcq) @@ -500,34 +489,6 @@ void mlx4_en_poll_tx_cq(unsigned long da spin_unlock(&ring->comp_lock); } -static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, - u32 index, - unsigned int desc_size) -{ - u32 copy = (ring->size - index) * TXBB_SIZE; - int i; - - for (i = desc_size - copy - 4; i >= 0; i -= 4) { - if ((i & (TXBB_SIZE - 1)) == 0) - wmb(); - - *((u32 *) (ring->buf + i)) = - *((u32 *) (ring->bounce_buf + copy + i)); - } - - for (i = copy - 4; i >= 4 ; i -= 4) { - if ((i & (TXBB_SIZE - 1)) == 0) - wmb(); - - *((u32 *) (ring->buf + index * TXBB_SIZE + i)) = - *((u32 *) (ring->bounce_buf + i)); - } - - /* Return real descriptor location */ - return (struct mlx4_en_tx_desc *)(ring->buf + index * TXBB_SIZE); -} - static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) { struct mlx4_en_cq *cq = priv->tx_cq[tx_ind]; @@ -546,30 +507,22 @@ static inline void mlx4_en_xmit_poll(str *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***