Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 3 Dec 2015 14:56:17 +0000 (UTC)
From:      Hans Petter Selasky <hselasky@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r291699 - in head/sys: modules/mlxen ofed/drivers/net/mlx4
Message-ID:  <201512031456.tB3EuHVs030967@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: hselasky
Date: Thu Dec  3 14:56:17 2015
New Revision: 291699
URL: https://svnweb.freebsd.org/changeset/base/291699

Log:
  Convert the mlxen driver to use the BUSDMA(9) APIs instead of
  vtophys() when loading mbufs for transmission and reception. While at
  it all pointer arithmetic and cast qualifier issues were fixed, mostly
  related to transmission and reception.
  
  MFC after:	1 week
  Sponsored by:	Mellanox Technologies
  Differential Revision:	https://reviews.freebsd.org/D4284

Modified:
  head/sys/modules/mlxen/Makefile
  head/sys/ofed/drivers/net/mlx4/en_netdev.c
  head/sys/ofed/drivers/net/mlx4/en_rx.c
  head/sys/ofed/drivers/net/mlx4/en_tx.c
  head/sys/ofed/drivers/net/mlx4/mlx4_en.h

Modified: head/sys/modules/mlxen/Makefile
==============================================================================
--- head/sys/modules/mlxen/Makefile	Thu Dec  3 14:38:55 2015	(r291698)
+++ head/sys/modules/mlxen/Makefile	Thu Dec  3 14:56:17 2015	(r291699)
@@ -11,5 +11,3 @@ CFLAGS+= -I${.CURDIR}/../../ofed/include
 CFLAGS+= -I${.CURDIR}/../../compat/linuxkpi/common/include
 
 .include <bsd.kmod.mk>
-
-CFLAGS+= -Wno-cast-qual -Wno-pointer-arith

Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c
==============================================================================
--- head/sys/ofed/drivers/net/mlx4/en_netdev.c	Thu Dec  3 14:38:55 2015	(r291698)
+++ head/sys/ofed/drivers/net/mlx4/en_netdev.c	Thu Dec  3 14:56:17 2015	(r291699)
@@ -1247,7 +1247,6 @@ int mlx4_en_start_port(struct net_device
 				    PAGE_SIZE);
 	priv->rx_alloc_order = get_order(priv->rx_alloc_size);
 	priv->rx_buf_size = roundup_pow_of_two(priv->rx_mb_size);
-	priv->log_rx_info = ROUNDUP_LOG2(sizeof(struct mlx4_en_rx_buf));
 	en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size);
 
 	/* Configure rx cq's and rings */
@@ -2091,8 +2090,6 @@ int mlx4_en_init_netdev(struct mlx4_en_d
 	priv->port = port;
 	priv->port_up = false;
 	priv->flags = prof->flags;
-        priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
-                        MLX4_WQE_CTRL_SOLICITED);
 
 	priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up;
 	priv->tx_ring_num = prof->tx_ring_num;
@@ -2108,7 +2105,7 @@ int mlx4_en_init_netdev(struct mlx4_en_d
 		err = -ENOMEM;
 		goto out;
 	}
-        
+
 	priv->rx_ring_num = prof->rx_ring_num;
 	priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
 	priv->mac_index = -1;

Modified: head/sys/ofed/drivers/net/mlx4/en_rx.c
==============================================================================
--- head/sys/ofed/drivers/net/mlx4/en_rx.c	Thu Dec  3 14:38:55 2015	(r291698)
+++ head/sys/ofed/drivers/net/mlx4/en_rx.c	Thu Dec  3 14:56:17 2015	(r291699)
@@ -54,104 +54,133 @@ static void mlx4_en_init_rx_desc(struct 
 	int possible_frags;
 	int i;
 
-
 	/* Set size and memtype fields */
-	for (i = 0; i < priv->num_frags; i++) {
-		rx_desc->data[i].byte_count =
-			cpu_to_be32(priv->frag_info[i].frag_size);
-		rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
-	}
+	rx_desc->data[0].byte_count = cpu_to_be32(priv->rx_mb_size);
+	rx_desc->data[0].lkey = cpu_to_be32(priv->mdev->mr.key);
 
-	/* If the number of used fragments does not fill up the ring stride,
-	 *          * remaining (unused) fragments must be padded with null address/size
-	 *                   * and a special memory key */
+	/*
+	 * If the number of used fragments does not fill up the ring
+	 * stride, remaining (unused) fragments must be padded with
+	 * null address/size and a special memory key:
+	 */
 	possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
-	for (i = priv->num_frags; i < possible_frags; i++) {
+	for (i = 1; i < possible_frags; i++) {
 		rx_desc->data[i].byte_count = 0;
 		rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
 		rx_desc->data[i].addr = 0;
 	}
-
 }
 
-static int mlx4_en_alloc_buf(struct mlx4_en_priv *priv,
-			     struct mlx4_en_rx_desc *rx_desc,
-			     struct mbuf **mb_list,
-			     int i)
+static int
+mlx4_en_alloc_buf(struct mlx4_en_rx_ring *ring,
+     __be64 *pdma, struct mlx4_en_rx_mbuf *mb_list)
 {
-	struct mlx4_en_dev *mdev = priv->mdev;
-	struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
+	bus_dma_segment_t segs[1];
+	bus_dmamap_t map;
 	struct mbuf *mb;
-	dma_addr_t dma;
+	int nsegs;
+	int err;
 
-	if (i == 0)
-		mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, frag_info->frag_size);
-	else
-		mb = m_getjcl(M_NOWAIT, MT_DATA, 0, frag_info->frag_size);
-	if (mb == NULL) {
-		priv->port_stats.rx_alloc_failed++;
-		return -ENOMEM;
+	/* try to allocate a new spare mbuf */
+	if (unlikely(ring->spare.mbuf == NULL)) {
+		mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+		if (unlikely(mb == NULL))
+			return (-ENOMEM);
+		/* setup correct length */
+		mb->m_len = ring->rx_mb_size;
+
+		/* load spare mbuf into BUSDMA */
+		err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, ring->spare.dma_map,
+		    mb, segs, &nsegs, BUS_DMA_NOWAIT);
+		if (unlikely(err != 0)) {
+			m_freem(mb);
+			return (err);
+		}
+		KASSERT(nsegs == 1,
+		    ("Number of segments is expected to be one"));
+
+		/* store spare info */
+		ring->spare.mbuf = mb;
+		ring->spare.paddr_be = cpu_to_be64(segs[0].ds_addr);
+
+		bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
+		    BUS_DMASYNC_PREREAD);
 	}
-	dma = pci_map_single(mdev->pdev, mb->m_data, frag_info->frag_size,
-			     PCI_DMA_FROMDEVICE);
-	rx_desc->data[i].addr = cpu_to_be64(dma);
-	mb_list[i] = mb;
-	return 0;
-}
 
+	/* synchronize and unload the current mbuf, if any */
+	if (likely(mb_list->mbuf != NULL)) {
+		bus_dmamap_sync(ring->dma_tag, mb_list->dma_map,
+		    BUS_DMASYNC_POSTREAD);
+		bus_dmamap_unload(ring->dma_tag, mb_list->dma_map);
+	}
 
-static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
-                                   struct mlx4_en_rx_ring *ring, int index)
-{
-        struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *)
-		    (ring->buf + (index * ring->stride));
-        struct mbuf **mb_list = ring->rx_info + (index << priv->log_rx_info);
-        int i;
+	mb = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, ring->rx_mb_size);
+	if (unlikely(mb == NULL))
+		goto use_spare;
 
-        for (i = 0; i < priv->num_frags; i++)
-                if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, i))
-                        goto err;
+	/* setup correct length */
+	mb->m_len = ring->rx_mb_size;
+
+	err = -bus_dmamap_load_mbuf_sg(ring->dma_tag, mb_list->dma_map,
+	    mb, segs, &nsegs, BUS_DMA_NOWAIT);
+	if (unlikely(err != 0)) {
+		m_freem(mb);
+		goto use_spare;
+	}
+	KASSERT(nsegs == 1, ("Number of segments is expected to be one"));
 
-        return 0;
+	*pdma = cpu_to_be64(segs[0].ds_addr);
+	mb_list->mbuf = mb;
 
-err:
-        while (i--)
-                m_free(mb_list[i]);
-        return -ENOMEM;
+	bus_dmamap_sync(ring->dma_tag, mb_list->dma_map, BUS_DMASYNC_PREREAD);
+	return (0);
+
+use_spare:
+	/* swap DMA maps */
+	map = mb_list->dma_map;
+	mb_list->dma_map = ring->spare.dma_map;
+	ring->spare.dma_map = map;
+
+	/* swap MBUFs */
+	mb_list->mbuf = ring->spare.mbuf;
+	ring->spare.mbuf = NULL;
+
+	/* store physical address */
+	*pdma = ring->spare.paddr_be;
+	return (0);
 }
 
-static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
+static void
+mlx4_en_free_buf(struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_mbuf *mb_list)
 {
-	*ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
+	bus_dmamap_t map = mb_list->dma_map;
+	bus_dmamap_sync(ring->dma_tag, map, BUS_DMASYNC_POSTREAD);
+	bus_dmamap_unload(ring->dma_tag, map);
+	m_freem(mb_list->mbuf);
+	mb_list->mbuf = NULL;	/* safety clearing */
 }
 
-static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
-				 struct mlx4_en_rx_ring *ring,
-				 int index)
+static int
+mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
+    struct mlx4_en_rx_ring *ring, int index)
 {
-	struct mlx4_en_frag_info *frag_info;
-	struct mlx4_en_dev *mdev = priv->mdev;
-	struct mbuf **mb_list;
 	struct mlx4_en_rx_desc *rx_desc = (struct mlx4_en_rx_desc *)
-	    (ring->buf + (index << ring->log_stride));
-	dma_addr_t dma;
-	int nr;
-
-	mb_list = ring->rx_info + (index << priv->log_rx_info);
-	for (nr = 0; nr < priv->num_frags; nr++) {
-		en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
- 		frag_info = &priv->frag_info[nr];
-		dma = be64_to_cpu(rx_desc->data[nr].addr);
-
-#if BITS_PER_LONG == 64
-		en_dbg(DRV, priv, "Unmaping buffer at dma:0x%lx\n", (u64) dma);
-#elif BITS_PER_LONG == 32
-                en_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma);
-#endif
-		pci_unmap_single(mdev->pdev, dma, frag_info->frag_size,
-				 PCI_DMA_FROMDEVICE);
-		m_free(mb_list[nr]);
+	    (ring->buf + (index * ring->stride));
+	struct mlx4_en_rx_mbuf *mb_list = ring->mbuf + index;
+
+	mb_list->mbuf = NULL;
+
+	if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list)) {
+		priv->port_stats.rx_alloc_failed++;
+		return (-ENOMEM);
 	}
+	return (0);
+}
+
+static inline void
+mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
+{
+	*ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
 }
 
 static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
@@ -194,7 +223,8 @@ reduce_rings:
 		while (ring->actual_size > new_size) {
 			ring->actual_size--;
 			ring->prod--;
-			mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
+			mlx4_en_free_buf(ring,
+			    ring->mbuf + ring->actual_size);
 		}
 	}
 
@@ -214,100 +244,106 @@ static void mlx4_en_free_rx_buf(struct m
 	while (ring->cons != ring->prod) {
 		index = ring->cons & ring->size_mask;
 		en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
-		mlx4_en_free_rx_desc(priv, ring, index);
+		mlx4_en_free_buf(ring, ring->mbuf + index);
 		++ring->cons;
 	}
 }
 
-#if MLX4_EN_MAX_RX_FRAGS == 3
-static int frag_sizes[] = {
-	FRAG_SZ0,
-	FRAG_SZ1,
-	FRAG_SZ2,
-};
-#elif MLX4_EN_MAX_RX_FRAGS == 2
-static int frag_sizes[] = {
-	FRAG_SZ0,
-	FRAG_SZ1,
-};
-#else
-#error "Unknown MAX_RX_FRAGS"
-#endif
-
 void mlx4_en_calc_rx_buf(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	int eff_mtu = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN;
-	int buf_size = 0;
-	int i, frag;
 
-	for (i = 0, frag = 0; buf_size < eff_mtu; frag++, i++) {
-		/*
-		 * Allocate small to large but only as much as is needed for
-		 * the tail.
-		 */
-		while (i > 0 && eff_mtu - buf_size <= frag_sizes[i - 1])
-			i--;
-		priv->frag_info[frag].frag_size = frag_sizes[i];
-		priv->frag_info[frag].frag_prefix_size = buf_size;
-		buf_size += priv->frag_info[frag].frag_size;
-	}
+	if (eff_mtu > MJUM16BYTES) {
+		en_err(priv, "MTU(%d) is too big\n", dev->if_mtu);
+                eff_mtu = MJUM16BYTES;
+        } else if (eff_mtu > MJUM9BYTES) {
+                eff_mtu = MJUM16BYTES;
+        } else if (eff_mtu > MJUMPAGESIZE) {
+                eff_mtu = MJUM9BYTES;
+        } else if (eff_mtu > MCLBYTES) {
+                eff_mtu = MJUMPAGESIZE;
+        } else {
+                eff_mtu = MCLBYTES;
+        }
 
-	priv->num_frags = frag;
 	priv->rx_mb_size = eff_mtu;
-	priv->log_rx_info =
-	    ROUNDUP_LOG2(priv->num_frags * sizeof(struct mbuf *));
 
-	en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
-		  "num_frags:%d):\n", eff_mtu, priv->num_frags);
-	for (i = 0; i < priv->num_frags; i++) {
-		en_dbg(DRV, priv, "  frag:%d - size:%d prefix:%d\n", i,
-				priv->frag_info[i].frag_size,
-				priv->frag_info[i].frag_prefix_size);
-	}
+	en_dbg(DRV, priv, "Effective RX MTU: %d bytes\n", eff_mtu);
 }
 
-
 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_rx_ring **pring,
 			   u32 size, int node)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rx_ring *ring;
-	int err = -ENOMEM;
+	int err;
 	int tmp;
+	uint32_t x;
 
         ring = kzalloc(sizeof(struct mlx4_en_rx_ring), GFP_KERNEL);
         if (!ring) {
                 en_err(priv, "Failed to allocate RX ring structure\n");
                 return -ENOMEM;
         }
- 
+
+	/* Create DMA descriptor TAG */
+	if ((err = -bus_dma_tag_create(
+	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
+	    1,				/* any alignment */
+	    0,				/* no boundary */
+	    BUS_SPACE_MAXADDR,		/* lowaddr */
+	    BUS_SPACE_MAXADDR,		/* highaddr */
+	    NULL, NULL,			/* filter, filterarg */
+	    MJUM16BYTES,		/* maxsize */
+	    1,				/* nsegments */
+	    MJUM16BYTES,		/* maxsegsize */
+	    0,				/* flags */
+	    NULL, NULL,			/* lockfunc, lockfuncarg */
+	    &ring->dma_tag))) {
+		en_err(priv, "Failed to create DMA tag\n");
+		goto err_ring;
+	}
+
 	ring->prod = 0;
 	ring->cons = 0;
 	ring->size = size;
 	ring->size_mask = size - 1;
-	ring->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
-	                                          DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
+	ring->stride = roundup_pow_of_two(
+	    sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
 	ring->log_stride = ffs(ring->stride) - 1;
 	ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
 
-	tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
-	                                        sizeof(struct mbuf *));
+	tmp = size * sizeof(struct mlx4_en_rx_mbuf);
 
-        ring->rx_info = kmalloc(tmp, GFP_KERNEL);
-        if (!ring->rx_info) {
+        ring->mbuf = kzalloc(tmp, GFP_KERNEL);
+        if (ring->mbuf == NULL) {
                 err = -ENOMEM;
-                goto err_ring;
+                goto err_dma_tag;
         }
 
-	en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
-		 ring->rx_info, tmp);
+	err = -bus_dmamap_create(ring->dma_tag, 0, &ring->spare.dma_map);
+	if (err != 0)
+		goto err_info;
+
+	for (x = 0; x != size; x++) {
+		err = -bus_dmamap_create(ring->dma_tag, 0,
+		    &ring->mbuf[x].dma_map);
+		if (err != 0) {
+			while (x--)
+				bus_dmamap_destroy(ring->dma_tag,
+				    ring->mbuf[x].dma_map);
+			goto err_info;
+		}
+	}
+	en_dbg(DRV, priv, "Allocated MBUF ring at addr:%p size:%d\n",
+		 ring->mbuf, tmp);
 
 	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
 				 ring->buf_size, 2 * PAGE_SIZE);
 	if (err)
-		goto err_info;
+		goto err_dma_map;
 
 	err = mlx4_en_map_buffer(&ring->wqres.buf);
 	if (err) {
@@ -320,23 +356,29 @@ int mlx4_en_create_rx_ring(struct mlx4_e
 
 err_hwq:
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+err_dma_map:
+	for (x = 0; x != size; x++) {
+		bus_dmamap_destroy(ring->dma_tag,
+		    ring->mbuf[x].dma_map);
+	}
+	bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map);
 err_info:
-	vfree(ring->rx_info);
+	vfree(ring->mbuf);
+err_dma_tag:
+	bus_dma_tag_destroy(ring->dma_tag);
 err_ring:
 	kfree(ring);
-
-	return err;
+	return (err);
 }
 
-
 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_rx_ring *ring;
 	int i;
 	int ring_ind;
 	int err;
-	int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
-	                                        DS_SIZE * priv->num_frags);
+	int stride = roundup_pow_of_two(
+	    sizeof(struct mlx4_en_rx_desc) + DS_SIZE);
 
 	for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
 		ring = priv->rx_ring[ring_ind];
@@ -412,10 +454,22 @@ void mlx4_en_destroy_rx_ring(struct mlx4
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rx_ring *ring = *pring;
+	uint32_t x;
 
 	mlx4_en_unmap_buffer(&ring->wqres.buf);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
-	vfree(ring->rx_info);
+	for (x = 0; x != size; x++)
+		bus_dmamap_destroy(ring->dma_tag, ring->mbuf[x].dma_map);
+	/* free spare mbuf, if any */
+	if (ring->spare.mbuf != NULL) {
+		bus_dmamap_sync(ring->dma_tag, ring->spare.dma_map,
+		    BUS_DMASYNC_POSTREAD);
+		bus_dmamap_unload(ring->dma_tag, ring->spare.dma_map);
+		m_freem(ring->spare.mbuf);
+	}
+	bus_dmamap_destroy(ring->dma_tag, ring->spare.dma_map);
+	vfree(ring->mbuf);
+	bus_dma_tag_destroy(ring->dma_tag);
 	kfree(ring);
 	*pring = NULL;
 #ifdef CONFIG_RFS_ACCEL
@@ -423,7 +477,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4
 #endif
 }
 
-
 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
 				struct mlx4_en_rx_ring *ring)
 {
@@ -472,69 +525,27 @@ static inline int invalid_cqe(struct mlx
 	return 0;
 }
 
-
-/* Unmap a completed descriptor and free unused pages */
-static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
-				    struct mlx4_en_rx_desc *rx_desc,
-				    struct mbuf **mb_list,
-				    int length)
-{
-	struct mlx4_en_dev *mdev = priv->mdev;
-	struct mlx4_en_frag_info *frag_info;
-	dma_addr_t dma;
-	struct mbuf *mb;
-	int nr;
-
-	mb = mb_list[0];
-	mb->m_pkthdr.len = length;
-	/* Collect used fragments while replacing them in the HW descirptors */
-	for (nr = 0; nr < priv->num_frags; nr++) {
-		frag_info = &priv->frag_info[nr];
-		if (length <= frag_info->frag_prefix_size)
-			break;
-		if (nr)
-			mb->m_next = mb_list[nr];
-		mb = mb_list[nr];
-		mb->m_len = frag_info->frag_size;
-		dma = be64_to_cpu(rx_desc->data[nr].addr);
-
-                /* Allocate a replacement page */
-                if (mlx4_en_alloc_buf(priv, rx_desc, mb_list, nr))
-                        goto fail;
-
-		/* Unmap buffer */
-		pci_unmap_single(mdev->pdev, dma, frag_info->frag_size,
-				 PCI_DMA_FROMDEVICE);
-	}
-	/* Adjust size of last fragment to match actual length */
-	mb->m_len = length - priv->frag_info[nr - 1].frag_prefix_size;
-	mb->m_next = NULL;
-	return 0;
-
-fail:
-        /* Drop all accumulated fragments (which have already been replaced in
-         * the descriptor) of this packet; remaining fragments are reused... */
-        while (nr > 0) {
-                nr--;
-                m_free(mb_list[nr]);
-        }
-        return -ENOMEM;
-
-}
-
-static struct mbuf *mlx4_en_rx_mb(struct mlx4_en_priv *priv,
-				  struct mlx4_en_rx_desc *rx_desc,
-				  struct mbuf **mb_list,
-				  unsigned int length)
+static struct mbuf *
+mlx4_en_rx_mb(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring,
+    struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_mbuf *mb_list,
+    int length)
 {
 	struct mbuf *mb;
 
-	mb = mb_list[0];
-	/* Move relevant fragments to mb */
-	if (unlikely(mlx4_en_complete_rx_desc(priv, rx_desc, mb_list, length)))
-		return NULL;
+	/* get mbuf */
+	mb = mb_list->mbuf;
 
-	return mb;
+	/* collect used fragment while atomically replacing it */
+	if (mlx4_en_alloc_buf(ring, &rx_desc->data[0].addr, mb_list))
+		return (NULL);
+
+	/* range check hardware computed value */
+	if (unlikely(length > mb->m_len))
+		length = mb->m_len;
+
+	/* update total packet length in packet header */
+	mb->m_len = mb->m_pkthdr.len = length;
+	return (mb);
 }
 
 /* For cpu arch with cache line of 64B the performance is better when cqe size==64B
@@ -548,7 +559,7 @@ int mlx4_en_process_rx_cq(struct net_dev
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_cqe *cqe;
 	struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
-	struct mbuf **mb_list;
+	struct mlx4_en_rx_mbuf *mb_list;
 	struct mlx4_en_rx_desc *rx_desc;
 	struct mbuf *mb;
 	struct mlx4_cq *mcq = &cq->mcq;
@@ -576,7 +587,7 @@ int mlx4_en_process_rx_cq(struct net_dev
 	/* Process all completed CQEs */
 	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
 		    cons_index & size)) {
-		mb_list = ring->rx_info + (index << priv->log_rx_info);
+		mb_list = ring->mbuf + index;
 		rx_desc = (struct mlx4_en_rx_desc *)
 		    (ring->buf + (index << ring->log_stride));
 
@@ -593,8 +604,9 @@ int mlx4_en_process_rx_cq(struct net_dev
 		 */
 		length = be32_to_cpu(cqe->byte_cnt);
 		length -= ring->fcs_del;
-		mb = mlx4_en_rx_mb(priv, rx_desc, mb_list, length);
-		if (!mb) {
+
+		mb = mlx4_en_rx_mb(priv, ring, rx_desc, mb_list, length);
+		if (unlikely(!mb)) {
 			ring->errors++;
 			goto next;
 		}

Modified: head/sys/ofed/drivers/net/mlx4/en_tx.c
==============================================================================
--- head/sys/ofed/drivers/net/mlx4/en_tx.c	Thu Dec  3 14:38:55 2015	(r291698)
+++ head/sys/ofed/drivers/net/mlx4/en_tx.c	Thu Dec  3 14:56:17 2015	(r291699)
@@ -67,6 +67,7 @@ int mlx4_en_create_tx_ring(struct mlx4_e
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_tx_ring *ring;
+	uint32_t x;
 	int tmp;
 	int err;
 
@@ -79,11 +80,26 @@ int mlx4_en_create_tx_ring(struct mlx4_e
 		}
 	}
 
+	/* Create DMA descriptor TAG */
+	if ((err = -bus_dma_tag_create(
+	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
+	    1,					/* any alignment */
+	    0,					/* no boundary */
+	    BUS_SPACE_MAXADDR,			/* lowaddr */
+	    BUS_SPACE_MAXADDR,			/* highaddr */
+	    NULL, NULL,				/* filter, filterarg */
+	    MLX4_EN_TX_MAX_PAYLOAD_SIZE,	/* maxsize */
+	    MLX4_EN_TX_MAX_MBUF_FRAGS,		/* nsegments */
+	    MLX4_EN_TX_MAX_MBUF_SIZE,		/* maxsegsize */
+	    0,					/* flags */
+	    NULL, NULL,				/* lockfunc, lockfuncarg */
+	    &ring->dma_tag)))
+		goto done;
+
 	ring->size = size;
 	ring->size_mask = size - 1;
 	ring->stride = stride;
-	ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
-	ring->inline_thold = min(inline_thold, MAX_INLINE);
+	ring->inline_thold = MAX(MIN_PKT_LEN, MIN(inline_thold, MAX_INLINE));
 	mtx_init(&ring->tx_lock.m, "mlx4 tx", NULL, MTX_DEF);
 	mtx_init(&ring->comp_lock.m, "mlx4 comp", NULL, MTX_DEF);
 
@@ -92,30 +108,36 @@ int mlx4_en_create_tx_ring(struct mlx4_e
 		M_WAITOK, &ring->tx_lock.m);
 	if (ring->br == NULL) {
 		en_err(priv, "Failed allocating tx_info ring\n");
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto err_free_dma_tag;
 	}
 
 	tmp = size * sizeof(struct mlx4_en_tx_info);
-	ring->tx_info = vmalloc_node(tmp, node);
+	ring->tx_info = kzalloc_node(tmp, GFP_KERNEL, node);
 	if (!ring->tx_info) {
-		ring->tx_info = vmalloc(tmp);
+		ring->tx_info = kzalloc(tmp, GFP_KERNEL);
 		if (!ring->tx_info) {
 			err = -ENOMEM;
 			goto err_ring;
 		}
 	}
 
-	en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
-		 ring->tx_info, tmp);
-
-	ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node);
-	if (!ring->bounce_buf) {
-		ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
-		if (!ring->bounce_buf) {
-			err = -ENOMEM;
+	/* Create DMA descriptor MAPs */
+	for (x = 0; x != size; x++) {
+		err = -bus_dmamap_create(ring->dma_tag, 0,
+		    &ring->tx_info[x].dma_map);
+		if (err != 0) {
+			while (x--) {
+				bus_dmamap_destroy(ring->dma_tag,
+				    ring->tx_info[x].dma_map);
+			}
 			goto err_info;
 		}
 	}
+
+	en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
+		 ring->tx_info, tmp);
+
 	ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
 
 	/* Allocate HW buffers on provided NUMA node */
@@ -123,7 +145,7 @@ int mlx4_en_create_tx_ring(struct mlx4_e
 				 2 * PAGE_SIZE);
 	if (err) {
 		en_err(priv, "Failed allocating hwq resources\n");
-		goto err_bounce;
+		goto err_dma_map;
 	}
 
 	err = mlx4_en_map_buffer(&ring->wqres.buf);
@@ -173,12 +195,16 @@ err_map:
 	mlx4_en_unmap_buffer(&ring->wqres.buf);
 err_hwq_res:
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
-err_bounce:
-	kfree(ring->bounce_buf);
+err_dma_map:
+	for (x = 0; x != size; x++)
+		bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map);
 err_info:
 	vfree(ring->tx_info);
 err_ring:
 	buf_ring_free(ring->br, M_DEVBUF);
+err_free_dma_tag:
+	bus_dma_tag_destroy(ring->dma_tag);
+done:
 	kfree(ring);
 	return err;
 }
@@ -188,6 +214,7 @@ void mlx4_en_destroy_tx_ring(struct mlx4
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_tx_ring *ring = *pring;
+	uint32_t x;
 	en_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn);
 
 	buf_ring_free(ring->br, M_DEVBUF);
@@ -198,10 +225,12 @@ void mlx4_en_destroy_tx_ring(struct mlx4
 	mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
 	mlx4_en_unmap_buffer(&ring->wqres.buf);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
-	kfree(ring->bounce_buf);
+	for (x = 0; x != ring->size; x++)
+		bus_dmamap_destroy(ring->dma_tag, ring->tx_info[x].dma_map);
 	vfree(ring->tx_info);
 	mtx_destroy(&ring->tx_lock.m);
 	mtx_destroy(&ring->comp_lock.m);
+	bus_dma_tag_destroy(ring->dma_tag);
 	kfree(ring);
 	*pring = NULL;
 }
@@ -219,7 +248,6 @@ int mlx4_en_activate_tx_ring(struct mlx4
 	ring->last_nr_txbb = 1;
 	ring->poll_cnt = 0;
 	ring->blocked = 0;
-	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
 	memset(ring->buf, 0, ring->buf_size);
 
 	ring->qp_state = MLX4_QP_STATE_RST;
@@ -244,99 +272,63 @@ void mlx4_en_deactivate_tx_ring(struct m
 		       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
 }
 
-static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
-		       struct mlx4_en_tx_ring *ring,
-		       int index, u8 owner)
+static volatile struct mlx4_wqe_data_seg *
+mlx4_en_store_inline_lso_data(volatile struct mlx4_wqe_data_seg *dseg,
+    struct mbuf *mb, int len, __be32 owner_bit)
 {
-	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
-	struct mlx4_en_tx_desc *tx_desc = (struct mlx4_en_tx_desc *)
-	    (ring->buf + index * TXBB_SIZE);
-	void *end = ring->buf + ring->buf_size;
-	__be32 *ptr = (__be32 *)tx_desc;
-	__be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT));
-	int i;
-
-	/* Optimize the common case when there are no wraparounds */
-	if (likely((void *)tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end))
-		/* Stamp the freed descriptor */
-		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
-			*ptr = stamp;
-			ptr += STAMP_DWORDS;
-		}
-	else
-		/* Stamp the freed descriptor */
-		for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
-			*ptr = stamp;
-			ptr += STAMP_DWORDS;
-			if ((void *)ptr >= end) {
-				ptr = (__be32 *)ring->buf;
-				stamp ^= cpu_to_be32(0x80000000);
-			}
-		}
+	uint8_t *inl = __DEVOLATILE(uint8_t *, dseg);
+
+	/* copy data into place */
+	m_copydata(mb, 0, len, inl + 4);
+	dseg += DIV_ROUND_UP(4 + len, DS_SIZE_ALIGNMENT);
+	return (dseg);
 }
 
-static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
-				struct mlx4_en_tx_ring *ring,
-				int index, u8 owner, u64 timestamp)
+static void
+mlx4_en_store_inline_lso_header(volatile struct mlx4_wqe_data_seg *dseg,
+    int len, __be32 owner_bit)
+{
+}
+
+static void
+mlx4_en_stamp_wqe(struct mlx4_en_priv *priv,
+    struct mlx4_en_tx_ring *ring, u32 index, u8 owner)
 {
-	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
 	struct mlx4_en_tx_desc *tx_desc = (struct mlx4_en_tx_desc *)
-	    (ring->buf + index * TXBB_SIZE);
-	struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
-        struct mbuf *mb = tx_info->mb;
-	void *end = ring->buf + ring->buf_size;
-	int frags = tx_info->nr_segs;;
-	int i;
-
-	/* Optimize the common case when there are no wraparounds */
-	if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
-		if (!tx_info->inl) {
-			if (tx_info->linear) {
-				dma_unmap_single(priv->ddev,
-					(dma_addr_t) be64_to_cpu(data->addr),
-					 be32_to_cpu(data->byte_count),
-					 PCI_DMA_TODEVICE);
-				++data;
-			}
+	    (ring->buf + (index * TXBB_SIZE));
+	volatile __be32 *ptr = (__be32 *)tx_desc;
+	const __be32 stamp = cpu_to_be32(STAMP_VAL |
+	    ((u32)owner << STAMP_SHIFT));
+	u32 i;
+
+	/* Stamp the freed descriptor */
+	for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) {
+		*ptr = stamp;
+		ptr += STAMP_DWORDS;
+	}
+}
 
-			for (i = 0; i < frags; i++) {
-                                pci_unmap_single(mdev->pdev,
-                                                (dma_addr_t) be64_to_cpu(data[i].addr),
-                                                data[i].byte_count, PCI_DMA_TODEVICE);
-			}
-		}
-	} else {
-		if (!tx_info->inl) {
-			if ((void *) data >= end) {
-				data = (struct mlx4_wqe_data_seg *)
-				    (ring->buf + ((void *)data - end));
-			}
+static u32
+mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
+    struct mlx4_en_tx_ring *ring, u32 index)
+{
+	struct mlx4_en_tx_info *tx_info;
+	struct mbuf *mb;
 
-			if (tx_info->linear) {
-				dma_unmap_single(priv->ddev,
-					(dma_addr_t) be64_to_cpu(data->addr),
-					 be32_to_cpu(data->byte_count),
-					 PCI_DMA_TODEVICE);
-				++data;
-			}
+	tx_info = &ring->tx_info[index];
+	mb = tx_info->mb;
+
+	if (mb == NULL)
+		goto done;
+
+	bus_dmamap_sync(ring->dma_tag, tx_info->dma_map,
+	    BUS_DMASYNC_POSTWRITE);
+	bus_dmamap_unload(ring->dma_tag, tx_info->dma_map);
 
-			for (i = 0; i < frags; i++) {
-				/* Check for wraparound before unmapping */
-				if ((void *) data >= end)
-					data = (struct mlx4_wqe_data_seg *)ring->buf;
-                                pci_unmap_single(mdev->pdev,
-                                                (dma_addr_t) be64_to_cpu(data->addr),
-                                                data->byte_count, PCI_DMA_TODEVICE);
-				++data;
-			}
-		}
-	}
-	/* Send a copy of the frame to the BPF listener */
-        if (priv->dev && priv->dev->if_bpf)
-                ETHER_BPF_MTAP(priv->dev, mb);
         m_freem(mb);
-	return tx_info->nr_txbb;
+done:
+	return (tx_info->nr_txbb);
 }
 
 int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
@@ -356,8 +348,7 @@ int mlx4_en_free_tx_buf(struct net_devic
 
 	while (ring->cons != ring->prod) {
 		ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring,
-						ring->cons & ring->size_mask,
-						!!(ring->cons & ring->size), 0);
+		    ring->cons & ring->size_mask);
 		ring->cons += ring->last_nr_txbb;
 		cnt++;
 	}
@@ -368,6 +359,14 @@ int mlx4_en_free_tx_buf(struct net_devic
 	return cnt;
 }
 
+static bool
+mlx4_en_tx_ring_is_full(struct mlx4_en_tx_ring *ring)
+{
+	int wqs;
+	wqs = ring->size - (ring->prod - ring->cons);
+	return (wqs < (HEADROOM + (2 * MLX4_EN_TX_WQE_MAX_WQEBBS)));
+}
+
 static int mlx4_en_process_tx_cq(struct net_device *dev,
 				 struct mlx4_en_cq *cq)
 {
@@ -383,12 +382,7 @@ static int mlx4_en_process_tx_cq(struct 
 	int size = cq->size;
 	u32 size_mask = ring->size_mask;
 	struct mlx4_cqe *buf = cq->buf;
-	u32 packets = 0;
-	u32 bytes = 0;
 	int factor = priv->cqe_factor;
-	u64 timestamp = 0;
-	int done = 0;
-
 
 	if (!priv->port_up)
 		return 0;
@@ -423,16 +417,12 @@ static int mlx4_en_process_tx_cq(struct 
 			ring_index = (ring_index + ring->last_nr_txbb) & size_mask;
 			/* free next descriptor */
 			ring->last_nr_txbb = mlx4_en_free_tx_desc(
-					priv, ring, ring_index,
-					!!((ring->cons + txbbs_skipped) &
-					ring->size), timestamp);
+			    priv, ring, ring_index);
 			mlx4_en_stamp_wqe(priv, ring, stamp_index,
 					  !!((ring->cons + txbbs_stamp) &
 						ring->size));
 			stamp_index = ring_index;
 			txbbs_stamp = txbbs_skipped;
-			packets++;
-			bytes += ring->tx_info[ring_index].nr_bytes;
 		} while (ring_index != new_index);
 
 		++cons_index;
@@ -451,15 +441,14 @@ static int mlx4_en_process_tx_cq(struct 
 	ring->cons += txbbs_skipped;
 
 	/* Wakeup Tx queue if it was stopped and ring is not full */
-	if (unlikely(ring->blocked) &&
-	    (ring->prod - ring->cons) <= ring->full_size) {
+	if (unlikely(ring->blocked) && !mlx4_en_tx_ring_is_full(ring)) {
 		ring->blocked = 0;
 		if (atomic_fetchadd_int(&priv->blocked, -1) == 1)
 			atomic_clear_int(&dev->if_drv_flags ,IFF_DRV_OACTIVE);
 		ring->wake_queue++;
 		priv->port_stats.wake_queue++;
 	}
-	return done;
+	return (0);
 }
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq)
@@ -500,34 +489,6 @@ void mlx4_en_poll_tx_cq(unsigned long da
 	spin_unlock(&ring->comp_lock);
 }
 
-static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
-						      struct mlx4_en_tx_ring *ring,
-						      u32 index,
-						      unsigned int desc_size)
-{
-	u32 copy = (ring->size - index) * TXBB_SIZE;
-	int i;
-
-	for (i = desc_size - copy - 4; i >= 0; i -= 4) {
-		if ((i & (TXBB_SIZE - 1)) == 0)
-			wmb();
-
-		*((u32 *) (ring->buf + i)) =
-			*((u32 *) (ring->bounce_buf + copy + i));
-	}
-
-	for (i = copy - 4; i >= 4 ; i -= 4) {
-		if ((i & (TXBB_SIZE - 1)) == 0)
-			wmb();
-
-		*((u32 *) (ring->buf + index * TXBB_SIZE + i)) =
-			*((u32 *) (ring->bounce_buf + i));
-	}
-
-	/* Return real descriptor location */
-	return (struct mlx4_en_tx_desc *)(ring->buf + index * TXBB_SIZE);
-}
-
 static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind)
 {
 	struct mlx4_en_cq *cq = priv->tx_cq[tx_ind];
@@ -546,30 +507,22 @@ static inline void mlx4_en_xmit_poll(str

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201512031456.tB3EuHVs030967>