Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 14 Aug 2018 11:24:14 +0000 (UTC)
From:      Hans Petter Selasky <hselasky@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r337743 - in stable/10/sys/dev/mlx5: mlx5_core mlx5_en
Message-ID:  <201808141124.w7EBOExD025720@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: hselasky
Date: Tue Aug 14 11:24:14 2018
New Revision: 337743
URL: https://svnweb.freebsd.org/changeset/base/337743

Log:
  Enter error state when handling bad device in mlx5core and add checks
  for error state to mlx5en(4) to make live migration work.
  
  This is a direct commit.
  
  Sponsored by:	Mellanox Technologies

Modified:
  stable/10/sys/dev/mlx5/mlx5_core/mlx5_health.c
  stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_main.c

Modified: stable/10/sys/dev/mlx5/mlx5_core/mlx5_health.c
==============================================================================
--- stable/10/sys/dev/mlx5/mlx5_core/mlx5_health.c	Tue Aug 14 11:19:04 2018	(r337742)
+++ stable/10/sys/dev/mlx5/mlx5_core/mlx5_health.c	Tue Aug 14 11:24:14 2018	(r337743)
@@ -56,10 +56,13 @@ static void health_care(struct work_struct *work)
 		priv = container_of(health, struct mlx5_priv, health);
 		dev = container_of(priv, struct mlx5_core_dev, priv);
 		mlx5_core_warn(dev, "handling bad device here\n");
-		/* nothing yet */
+
 		spin_lock_irq(&health_lock);
 		list_del_init(&health->list);
 		spin_unlock_irq(&health_lock);
+
+		/* enter error state */
+		mlx5_enter_error_state(dev);
 	}
 }
 

Modified: stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
==============================================================================
--- stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_main.c	Tue Aug 14 11:19:04 2018	(r337742)
+++ stable/10/sys/dev/mlx5/mlx5_en/mlx5_en_main.c	Tue Aug 14 11:24:14 2018	(r337743)
@@ -921,8 +921,11 @@ mlx5e_close_rq(struct mlx5e_rq *rq)
 static void
 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
 {
+	struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
+
 	/* wait till RQ is empty */
-	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
+	while (!mlx5_wq_ll_is_empty(&rq->wq) &&
+		(mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
 		msleep(4);
 		rq->cq.mcq.comp(&rq->cq.mcq);
 	}
@@ -1264,6 +1267,7 @@ void
 mlx5e_drain_sq(struct mlx5e_sq *sq)
 {
 	int error;
+	struct mlx5_core_dev *mdev = sq->priv->mdev;
 
 	/*
 	 * Check if already stopped.
@@ -1296,7 +1300,8 @@ mlx5e_drain_sq(struct mlx5e_sq *sq)
 	/* wait till SQ is empty or link is down */
 	mtx_lock(&sq->lock);
 	while (sq->cc != sq->pc &&
-	    (sq->priv->media_status_last & IFM_ACTIVE) != 0) {
+	    (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
+	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
 		mtx_unlock(&sq->lock);
 		msleep(1);
 		sq->cq.mcq.comp(&sq->cq.mcq);
@@ -1313,7 +1318,8 @@ mlx5e_drain_sq(struct mlx5e_sq *sq)
 
 	/* wait till SQ is empty */
 	mtx_lock(&sq->lock);
-	while (sq->cc != sq->pc) {
+	while (sq->cc != sq->pc &&
+	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
 		mtx_unlock(&sq->lock);
 		msleep(1);
 		sq->cq.mcq.comp(&sq->cq.mcq);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201808141124.w7EBOExD025720>