Date: Fri, 23 Mar 2018 18:20:43 +0000 (UTC) From: Hans Petter Selasky <hselasky@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r331449 - in head/sys/dev/mlx5: . mlx5_core Message-ID: <201803231820.w2NIKhrD056107@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: hselasky Date: Fri Mar 23 18:20:42 2018 New Revision: 331449 URL: https://svnweb.freebsd.org/changeset/base/331449 Log: Handle software reset of firmware in error flow in mlx5core. Some mlx5 adapter firmware allows the driver to reset the firmware in the event of an error. When a software reset is issued on any physical function all PFs enter reset state. This is a recoverable condition. The existing recovery flow was designed to allow the recovery of a VF after a PF driver reload. This patch expands the scope of that flow to recover PFs or VFs after a SW reset has been issued. When a software reset is issued the following occurs: 1. The NIC interface mode is set to SW_RESET (7) while the reset is in progress. 2. Once the reset completes the NIC interface mode is set to NIC disabled (1). After the reset has been issued (added in a subsequent patch) the health poll for other functions will detect that the NIC interface state has been set to disabled. This will cause it to enter the existing recovery flow. If the PCI is still working (meaning it doesn't return 0xff on all reads) it means recovery can proceed immediately instead of waiting 60 seconds. The error detetion has also been refactored to avoid incorrect or misleading log messages. Submitted by: slavash@ MFC after: 1 week Sponsored by: Mellanox Technologies Modified: head/sys/dev/mlx5/driver.h head/sys/dev/mlx5/mlx5_core/mlx5_health.c Modified: head/sys/dev/mlx5/driver.h ============================================================================== --- head/sys/dev/mlx5/driver.h Fri Mar 23 18:15:01 2018 (r331448) +++ head/sys/dev/mlx5/driver.h Fri Mar 23 18:20:42 2018 (r331449) @@ -492,7 +492,7 @@ struct mlx5_core_health { struct timer_list timer; u32 prev; int miss_counter; - bool sick; + u32 fatal_error; /* wq spinlock to synchronize draining */ spinlock_t wq_lock; struct workqueue_struct *wq; Modified: head/sys/dev/mlx5/mlx5_core/mlx5_health.c ============================================================================== --- head/sys/dev/mlx5/mlx5_core/mlx5_health.c Fri Mar 23 18:15:01 2018 (r331448) +++ head/sys/dev/mlx5/mlx5_core/mlx5_health.c Fri Mar 23 18:20:42 2018 (r331449) @@ -30,6 +30,7 @@ #include <linux/random.h> #include <linux/vmalloc.h> #include <linux/hardirq.h> +#include <linux/delay.h> #include <dev/mlx5/driver.h> #include <dev/mlx5/mlx5_ifc.h> #include "mlx5_core.h" @@ -41,7 +42,7 @@ enum { MLX5_NIC_IFC_FULL = 0, MLX5_NIC_IFC_DISABLED = 1, MLX5_NIC_IFC_NO_DRAM_NIC = 2, - MLX5_NIC_IFC_INVALID = 3, + MLX5_NIC_IFC_SW_RESET = 7, }; enum { @@ -49,9 +50,17 @@ enum { MLX5_DROP_NEW_RECOVERY_WORK, }; -static u8 get_nic_state(struct mlx5_core_dev *dev) +enum { + MLX5_SENSOR_NO_ERR = 0, + MLX5_SENSOR_PCI_COMM_ERR = 1, + MLX5_SENSOR_PCI_ERR = 2, + MLX5_SENSOR_NIC_DISABLED = 3, + MLX5_SENSOR_NIC_SW_RESET = 4, +}; + +static u8 get_nic_mode(struct mlx5_core_dev *dev) { - return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; + return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; } static void mlx5_trigger_cmd_completions(struct mlx5_core_dev *dev) @@ -77,20 +86,39 @@ no_trig: spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); } -static int in_fatal(struct mlx5_core_dev *dev) +static bool sensor_pci_no_comm(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct mlx5_health_buffer __iomem *h = health->health; + bool err = ioread32be(&h->fw_ver) == 0xffffffff; - if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) - return 1; + return err; +} - if (ioread32be(&h->fw_ver) == 0xffffffff) - return 1; +static bool sensor_nic_disabled(struct mlx5_core_dev *dev) +{ + return get_nic_mode(dev) == MLX5_NIC_IFC_DISABLED; +} - return 0; +static bool sensor_nic_sw_reset(struct mlx5_core_dev *dev) +{ + return get_nic_mode(dev) == MLX5_NIC_IFC_SW_RESET; } +static u32 check_fatal_sensors(struct mlx5_core_dev *dev) +{ + if (sensor_pci_no_comm(dev)) + return MLX5_SENSOR_PCI_COMM_ERR; + if (pci_channel_offline(dev->pdev)) + return MLX5_SENSOR_PCI_ERR; + if (sensor_nic_disabled(dev)) + return MLX5_SENSOR_NIC_DISABLED; + if (sensor_nic_sw_reset(dev)) + return MLX5_SENSOR_NIC_SW_RESET; + + return MLX5_SENSOR_NO_ERR; +} + void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) { mutex_lock(&dev->intf_state_mutex); @@ -101,7 +129,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, if (!force) mlx5_core_err(dev, "internal state error detected\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { + if (check_fatal_sensors(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_trigger_cmd_completions(dev); } @@ -116,57 +144,80 @@ unlock: static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) { - u8 nic_state = get_nic_state(dev); + u8 nic_mode = get_nic_mode(dev); - switch (nic_state) { - case MLX5_NIC_IFC_FULL: - mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n"); - break; - - case MLX5_NIC_IFC_DISABLED: - mlx5_core_warn(dev, "starting teardown\n"); - break; - - case MLX5_NIC_IFC_NO_DRAM_NIC: - mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); - break; - default: - mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", - nic_state); + if (nic_mode == MLX5_NIC_IFC_SW_RESET) { + /* The IFC mode field is 3 bits, so it will read 0x7 in two cases: + * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded + * and this is a VF), this is not recoverable by SW reset. + * Logging of this is handled elsewhere. + * 2. FW reset has been issued by another function, driver can + * be reloaded to recover after the mode switches to + * MLX5_NIC_IFC_DISABLED. + */ + if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) + mlx5_core_warn(dev, "NIC SW reset is already progress\n"); + else + mlx5_core_warn(dev, "Communication with FW over the PCI link is down\n"); + } else { + mlx5_core_warn(dev, "NIC mode %d\n", nic_mode); } mlx5_disable_device(dev); } +#define MLX5_FW_RESET_WAIT_MS 1000 +#define MLX5_NIC_STATE_POLL_MS 5 static void health_recover(struct work_struct *work) { + unsigned long end = jiffies + msecs_to_jiffies(MLX5_FW_RESET_WAIT_MS); struct mlx5_core_health *health; struct delayed_work *dwork; struct mlx5_core_dev *dev; struct mlx5_priv *priv; - u8 nic_state; + u8 nic_mode; dwork = container_of(work, struct delayed_work, work); health = container_of(dwork, struct mlx5_core_health, recover_work); priv = container_of(health, struct mlx5_priv, health); dev = container_of(priv, struct mlx5_core_dev, priv); - nic_state = get_nic_state(dev); - if (nic_state == MLX5_NIC_IFC_INVALID) { - dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n"); + if (sensor_pci_no_comm(dev)) { + dev_err(&dev->pdev->dev, "health recovery flow aborted, PCI reads still not working\n"); return; } + nic_mode = get_nic_mode(dev); + while (nic_mode != MLX5_NIC_IFC_DISABLED && + !time_after(jiffies, end)) { + msleep(MLX5_NIC_STATE_POLL_MS); + nic_mode = get_nic_mode(dev); + } + + if (nic_mode != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "health recovery flow aborted, unexpected NIC IFC mode %d.\n", + nic_mode); + return; + } + dev_err(&dev->pdev->dev, "starting health recovery flow\n"); mlx5_recover_device(dev); } /* How much time to wait until health resetting the driver (in msecs) */ #define MLX5_RECOVERY_DELAY_MSECS 60000 +#define MLX5_RECOVERY_NO_DELAY 0 +static unsigned long get_recovery_delay(struct mlx5_core_dev *dev) +{ + return dev->priv.health.fatal_error == MLX5_SENSOR_PCI_ERR || + dev->priv.health.fatal_error == MLX5_SENSOR_PCI_COMM_ERR ? + MLX5_RECOVERY_DELAY_MSECS : MLX5_RECOVERY_NO_DELAY; +} + static void health_care(struct work_struct *work) { - unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS); struct mlx5_core_health *health; + unsigned long recover_delay; struct mlx5_core_dev *dev; struct mlx5_priv *priv; unsigned long flags; @@ -176,6 +227,7 @@ static void health_care(struct work_struct *work) dev = container_of(priv, struct mlx5_core_dev, priv); mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); + recover_delay = msecs_to_jiffies(get_recovery_delay(dev)); spin_lock_irqsave(&health->wq_lock, flags); if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) @@ -272,6 +324,7 @@ static void poll_health(unsigned long data) { struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; struct mlx5_core_health *health = &dev->priv.health; + u32 fatal_error; u32 count; if (dev->state != MLX5_DEVICE_STATE_UP) @@ -292,8 +345,11 @@ static void poll_health(unsigned long data) print_health_info(dev); } - if (in_fatal(dev) && !health->sick) { - health->sick = true; + fatal_error = check_fatal_sensors(dev); + + if (fatal_error && !health->fatal_error) { + mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); + dev->priv.health.fatal_error = fatal_error; print_health_info(dev); mlx5_trigger_health_work(dev); } @@ -307,7 +363,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; init_timer(&health->timer); - health->sick = 0; + health->fatal_error = MLX5_SENSOR_NO_ERR; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803231820.w2NIKhrD056107>