Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 26 Aug 2019 15:13:46 +0200
From:      Hans Petter Selasky <hps@selasky.org>
To:        Jason Bacon <bacon4000@gmail.com>, freebsd-infiniband@freebsd.org
Subject:   Re: Kernel modules
Message-ID:  <bb497012-81c9-f05a-6d1a-6061fa731348@selasky.org>
In-Reply-To: <691d5884-6947-8044-ecf6-05ab97e9faca@gmail.com>
References:  <0eba9ec9-692f-7677-2b10-4e67a232821c@gmail.com> <f3f94452-155f-79f4-72d8-bf65760ae5b0@selasky.org> <598a58f0-89b8-d00d-5ed7-74dd7005950f@gmail.com> <73ce0738-4d63-2f25-2ff6-00f0092de136@selasky.org> <2090dd24-db43-b689-4289-f50bd70090ea@gmail.com> <6673df26-8bba-ebd3-b2c5-d7e9c97db557@gmail.com> <d82f3a60-6ad4-dba8-a15b-355a536a9a83@gmail.com> <bd42597e-2981-4667-468e-b008b9be290b@selasky.org> <2f4d9a14-4ff6-0d34-06f0-bbb4ac76c6bd@gmail.com> <5166ec29-876b-0bd3-8a84-8a222647e87a@gmail.com> <b6e6f8931f59fb2ecf985478ea4d77b7@postgresql.org> <236a3839-e880-ab17-146a-4521d1894813@gmail.com> <ea3b8d21-3ce0-17ea-1e04-a84ef7c81baa@gmail.com> <691d5884-6947-8044-ecf6-05ab97e9faca@gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------9EB69E496ED5EEE314553BB3
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Transfer-Encoding: 8bit

On 2019-04-18 14:52, Jason Bacon wrote:
> 
> My NFS over IB has been generally working well, but it's going down 
> under very heavy load.  It's getting consistently triggered by 256 
> I/O-intensive processes across about a dozen compute nodes.
> 
> The server remains up and responsive, but ib0 is running out of buffer 
> space and going down.
> 

Hi Jason,

Mellanox found a bug in ipoib which can lead to similar sympthoms that 
you see. Can you try the attached patch?

Thank you!

--HPS

--------------9EB69E496ED5EEE314553BB3
Content-Type: text/x-patch;
 name="0001-ipoib-Make-sure-the-transmit-loop-doesn-t-get-starve.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename*0="0001-ipoib-Make-sure-the-transmit-loop-doesn-t-get-starve.pa";
 filename*1="tch"

When the software send queue gets filled up callbacks to
if_transmit will stop. Make sure the transmit callback
routine checks the send queue and outputs any remaining
mbufs. Else the remaining mbufs may simply sit in the
output queue blocking the transmit path.

---
 sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h |  4 +++-
 .../drivers/infiniband/ulp/ipoib/ipoib_cm.c   |  6 +++--
 .../drivers/infiniband/ulp/ipoib/ipoib_ib.c   | 11 ++++++----
 .../drivers/infiniband/ulp/ipoib/ipoib_main.c | 22 +++++++++++++------
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
index 70cb4a38..8f27a0dc 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -536,7 +536,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv);
 
 int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req, int max);
 void ipoib_dma_unmap_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req);
-int ipoib_poll_tx(struct ipoib_dev_priv *priv);
+int ipoib_poll_tx(struct ipoib_dev_priv *priv, bool do_start);
 
 void ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req);
 void ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length);
@@ -764,4 +764,6 @@ extern int ipoib_debug_level;
 
 #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
 
+void ipoib_start_locked(struct ifnet *, struct ipoib_dev_priv *);
+
 #endif /* _IPOIB_H */
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 119503d7..2f0ea2fc 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -618,8 +618,10 @@ void ipoib_cm_send(struct ipoib_dev_priv *priv, struct mbuf *mb, struct ipoib_cm
 	struct ipoib_cm_tx_buf *tx_req;
 	struct ifnet *dev = priv->dev;
 
-	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
-		while (ipoib_poll_tx(priv)); /* nothing */
+	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) {
+		while (ipoib_poll_tx(priv, false))
+			;	/* nothing */
+	}
 
 	m_adj(mb, sizeof(struct ipoib_pseudoheader));
 	if (unlikely(mb->m_pkthdr.len > tx->mtu)) {
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index e38b450e..9cb2e9b1 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -366,7 +366,7 @@ static void ipoib_ib_handle_tx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc)
 }
 
 int
-ipoib_poll_tx(struct ipoib_dev_priv *priv)
+ipoib_poll_tx(struct ipoib_dev_priv *priv, bool do_start)
 {
 	int n, i;
 
@@ -379,6 +379,9 @@ ipoib_poll_tx(struct ipoib_dev_priv *priv)
 			ipoib_ib_handle_tx_wc(priv, wc);
 	}
 
+	if (do_start && n != 0)
+		ipoib_start_locked(priv->dev, priv);
+
 	return n == MAX_SEND_CQE;
 }
 
@@ -425,7 +428,7 @@ static void drain_tx_cq(struct ipoib_dev_priv *priv)
 	struct ifnet *dev = priv->dev;
 
 	spin_lock(&priv->lock);
-	while (ipoib_poll_tx(priv))
+	while (ipoib_poll_tx(priv, true))
 		; /* nothing */
 
 	if (dev->if_drv_flags & IFF_DRV_OACTIVE)
@@ -482,7 +485,7 @@ ipoib_send(struct ipoib_dev_priv *priv, struct mbuf *mb,
 	void *phead;
 
 	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
-		while (ipoib_poll_tx(priv))
+		while (ipoib_poll_tx(priv, false))
 			; /* nothing */
 
 	m_adj(mb, sizeof (struct ipoib_pseudoheader));
@@ -762,7 +765,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv)
 	spin_unlock(&priv->drain_lock);
 
 	spin_lock(&priv->lock);
-	while (ipoib_poll_tx(priv))
+	while (ipoib_poll_tx(priv, true))
 		; /* nothing */
 
 	spin_unlock(&priv->lock);
diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ac3108ed..4aad5bd4 100644
--- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -772,17 +772,13 @@ ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb)
 	return 0;
 }
 
-
-static void
-_ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv)
+void
+ipoib_start_locked(struct ifnet *dev, struct ipoib_dev_priv *priv)
 {
 	struct mbuf *mb;
 
-	if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
-	    IFF_DRV_RUNNING)
-		return;
+	assert_spin_locked(&priv->lock);
 
-	spin_lock(&priv->lock);
 	while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) &&
 	    (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
 		IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
@@ -791,6 +787,18 @@ _ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv)
 		IPOIB_MTAP(dev, mb);
 		ipoib_send_one(priv, mb);
 	}
+}
+
+static void
+_ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv)
+{
+
+	if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
+	    IFF_DRV_RUNNING)
+		return;
+
+	spin_lock(&priv->lock);
+	ipoib_start_locked(dev, priv);
 	spin_unlock(&priv->lock);
 }
 
-- 
2.21.0


--------------9EB69E496ED5EEE314553BB3--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?bb497012-81c9-f05a-6d1a-6061fa731348>