Date: Mon, 26 Aug 2019 15:13:46 +0200 From: Hans Petter Selasky <hps@selasky.org> To: Jason Bacon <bacon4000@gmail.com>, freebsd-infiniband@freebsd.org Subject: Re: Kernel modules Message-ID: <bb497012-81c9-f05a-6d1a-6061fa731348@selasky.org> In-Reply-To: <691d5884-6947-8044-ecf6-05ab97e9faca@gmail.com> References: <0eba9ec9-692f-7677-2b10-4e67a232821c@gmail.com> <f3f94452-155f-79f4-72d8-bf65760ae5b0@selasky.org> <598a58f0-89b8-d00d-5ed7-74dd7005950f@gmail.com> <73ce0738-4d63-2f25-2ff6-00f0092de136@selasky.org> <2090dd24-db43-b689-4289-f50bd70090ea@gmail.com> <6673df26-8bba-ebd3-b2c5-d7e9c97db557@gmail.com> <d82f3a60-6ad4-dba8-a15b-355a536a9a83@gmail.com> <bd42597e-2981-4667-468e-b008b9be290b@selasky.org> <2f4d9a14-4ff6-0d34-06f0-bbb4ac76c6bd@gmail.com> <5166ec29-876b-0bd3-8a84-8a222647e87a@gmail.com> <b6e6f8931f59fb2ecf985478ea4d77b7@postgresql.org> <236a3839-e880-ab17-146a-4521d1894813@gmail.com> <ea3b8d21-3ce0-17ea-1e04-a84ef7c81baa@gmail.com> <691d5884-6947-8044-ecf6-05ab97e9faca@gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format. --------------9EB69E496ED5EEE314553BB3 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 8bit On 2019-04-18 14:52, Jason Bacon wrote: > > My NFS over IB has been generally working well, but it's going down > under very heavy load. It's getting consistently triggered by 256 > I/O-intensive processes across about a dozen compute nodes. > > The server remains up and responsive, but ib0 is running out of buffer > space and going down. > Hi Jason, Mellanox found a bug in ipoib which can lead to similar sympthoms that you see. Can you try the attached patch? Thank you! --HPS --------------9EB69E496ED5EEE314553BB3 Content-Type: text/x-patch; name="0001-ipoib-Make-sure-the-transmit-loop-doesn-t-get-starve.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename*0="0001-ipoib-Make-sure-the-transmit-loop-doesn-t-get-starve.pa"; filename*1="tch" When the software send queue gets filled up callbacks to if_transmit will stop. Make sure the transmit callback routine checks the send queue and outputs any remaining mbufs. Else the remaining mbufs may simply sit in the output queue blocking the transmit path. --- sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h | 4 +++- .../drivers/infiniband/ulp/ipoib/ipoib_cm.c | 6 +++-- .../drivers/infiniband/ulp/ipoib/ipoib_ib.c | 11 ++++++---- .../drivers/infiniband/ulp/ipoib/ipoib_main.c | 22 +++++++++++++------ 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h index 70cb4a38..8f27a0dc 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib.h @@ -536,7 +536,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv); int ipoib_dma_map_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req, int max); void ipoib_dma_unmap_tx(struct ib_device *ca, struct ipoib_tx_buf *tx_req); -int ipoib_poll_tx(struct ipoib_dev_priv *priv); +int ipoib_poll_tx(struct ipoib_dev_priv *priv, bool do_start); void ipoib_dma_unmap_rx(struct ipoib_dev_priv *priv, struct ipoib_rx_buf *rx_req); void ipoib_dma_mb(struct ipoib_dev_priv *priv, struct mbuf *mb, unsigned int length); @@ -764,4 +764,6 @@ extern int ipoib_debug_level; #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) +void ipoib_start_locked(struct ifnet *, struct ipoib_dev_priv *); + #endif /* _IPOIB_H */ diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 119503d7..2f0ea2fc 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -618,8 +618,10 @@ void ipoib_cm_send(struct ipoib_dev_priv *priv, struct mbuf *mb, struct ipoib_cm struct ipoib_cm_tx_buf *tx_req; struct ifnet *dev = priv->dev; - if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) - while (ipoib_poll_tx(priv)); /* nothing */ + if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) { + while (ipoib_poll_tx(priv, false)) + ; /* nothing */ + } m_adj(mb, sizeof(struct ipoib_pseudoheader)); if (unlikely(mb->m_pkthdr.len > tx->mtu)) { diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c index e38b450e..9cb2e9b1 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -366,7 +366,7 @@ static void ipoib_ib_handle_tx_wc(struct ipoib_dev_priv *priv, struct ib_wc *wc) } int -ipoib_poll_tx(struct ipoib_dev_priv *priv) +ipoib_poll_tx(struct ipoib_dev_priv *priv, bool do_start) { int n, i; @@ -379,6 +379,9 @@ ipoib_poll_tx(struct ipoib_dev_priv *priv) ipoib_ib_handle_tx_wc(priv, wc); } + if (do_start && n != 0) + ipoib_start_locked(priv->dev, priv); + return n == MAX_SEND_CQE; } @@ -425,7 +428,7 @@ static void drain_tx_cq(struct ipoib_dev_priv *priv) struct ifnet *dev = priv->dev; spin_lock(&priv->lock); - while (ipoib_poll_tx(priv)) + while (ipoib_poll_tx(priv, true)) ; /* nothing */ if (dev->if_drv_flags & IFF_DRV_OACTIVE) @@ -482,7 +485,7 @@ ipoib_send(struct ipoib_dev_priv *priv, struct mbuf *mb, void *phead; if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) - while (ipoib_poll_tx(priv)) + while (ipoib_poll_tx(priv, false)) ; /* nothing */ m_adj(mb, sizeof (struct ipoib_pseudoheader)); @@ -762,7 +765,7 @@ void ipoib_drain_cq(struct ipoib_dev_priv *priv) spin_unlock(&priv->drain_lock); spin_lock(&priv->lock); - while (ipoib_poll_tx(priv)) + while (ipoib_poll_tx(priv, true)) ; /* nothing */ spin_unlock(&priv->lock); diff --git a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c index ac3108ed..4aad5bd4 100644 --- a/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -772,17 +772,13 @@ ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb) return 0; } - -static void -_ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) +void +ipoib_start_locked(struct ifnet *dev, struct ipoib_dev_priv *priv) { struct mbuf *mb; - if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING) - return; + assert_spin_locked(&priv->lock); - spin_lock(&priv->lock); while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) && (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) { IFQ_DRV_DEQUEUE(&dev->if_snd, mb); @@ -791,6 +787,18 @@ _ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) IPOIB_MTAP(dev, mb); ipoib_send_one(priv, mb); } +} + +static void +_ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv) +{ + + if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != + IFF_DRV_RUNNING) + return; + + spin_lock(&priv->lock); + ipoib_start_locked(dev, priv); spin_unlock(&priv->lock); } -- 2.21.0 --------------9EB69E496ED5EEE314553BB3--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?bb497012-81c9-f05a-6d1a-6061fa731348>