Date: Mon, 7 Oct 2019 09:02:00 +0000 (UTC) From: Hans Petter Selasky <hselasky@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r353206 - in stable/11/sys/dev/mlx5: . mlx5_core mlx5_en Message-ID: <201910070902.x97920qW072318@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: hselasky Date: Mon Oct 7 09:01:59 2019 New Revision: 353206 URL: https://svnweb.freebsd.org/changeset/base/353206 Log: MFC r352966: Add port module event software counters in mlx5core. While at it, fixup PME based on latest PRM defines. Submitted by: slavash@ Sponsored by: Mellanox Technologies Modified: stable/11/sys/dev/mlx5/device.h stable/11/sys/dev/mlx5/driver.h stable/11/sys/dev/mlx5/mlx5_core/mlx5_eq.c stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/dev/mlx5/device.h ============================================================================== --- stable/11/sys/dev/mlx5/device.h Mon Oct 7 09:01:21 2019 (r353205) +++ stable/11/sys/dev/mlx5/device.h Mon Oct 7 09:01:59 2019 (r353206) @@ -537,7 +537,7 @@ enum { MLX5_MODULE_STATUS_PLUGGED_ENABLED = 0x1, MLX5_MODULE_STATUS_UNPLUGGED = 0x2, MLX5_MODULE_STATUS_ERROR = 0x3, - MLX5_MODULE_STATUS_PLUGGED_DISABLED = 0x4, + MLX5_MODULE_STATUS_NUM , }; enum { @@ -549,7 +549,7 @@ enum { MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE = 0x5, MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED = 0x7, - MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED = 0xc, + MLX5_MODULE_EVENT_ERROR_NUM , }; struct mlx5_eqe_port_module_event { Modified: stable/11/sys/dev/mlx5/driver.h ============================================================================== --- stable/11/sys/dev/mlx5/driver.h Mon Oct 7 09:01:21 2019 (r353205) +++ stable/11/sys/dev/mlx5/driver.h Mon Oct 7 09:01:59 2019 (r353206) @@ -546,6 +546,11 @@ struct mlx5_mr_table { struct radix_tree_root tree; }; +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -598,6 +603,7 @@ struct mlx5_priv { struct list_head ctx_list; spinlock_t ctx_lock; unsigned long pci_dev_data; + struct mlx5_pme_stats pme_stats; }; enum mlx5_device_state { Modified: stable/11/sys/dev/mlx5/mlx5_core/mlx5_eq.c ============================================================================== --- stable/11/sys/dev/mlx5/mlx5_core/mlx5_eq.c Mon Oct 7 09:01:21 2019 (r353205) +++ stable/11/sys/dev/mlx5/mlx5_core/mlx5_eq.c Mon Oct 7 09:01:59 2019 (r353206) @@ -639,9 +639,9 @@ static const char *mlx5_port_module_event_error_type_t { switch (error_type) { case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: - return "Power Budget Exceeded"; + return "Power budget exceeded"; case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE: - return "Long Range for non MLNX cable/module"; + return "Long Range for non MLNX cable"; case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: return "Bus stuck(I2C or data shorted)"; case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: @@ -649,18 +649,11 @@ static const char *mlx5_port_module_event_error_type_t case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: return "Enforce part number list"; case MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE: - return "Unsupported Cable"; + return "Unknown identifier"; case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: return "High Temperature"; case MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED: - return "Cable is shorted"; - case MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED: - return "One or more network ports have been powered " - "down due to insufficient/unadvertised power on " - "the PCIe slot. Please refer to the card's user " - "manual for power specifications or contact " - "Mellanox support."; - + return "Bad or shorted cable/module"; default: return "Unknown error type"; } @@ -686,29 +679,36 @@ static void mlx5_port_module_event(struct mlx5_core_de module_num = (unsigned int)module_event_eqe->module; module_status = (unsigned int)module_event_eqe->module_status & - PORT_MODULE_EVENT_MODULE_STATUS_MASK; + PORT_MODULE_EVENT_MODULE_STATUS_MASK; error_type = (unsigned int)module_event_eqe->error_type & - PORT_MODULE_EVENT_ERROR_TYPE_MASK; + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + if (module_status < MLX5_MODULE_STATUS_NUM) + dev->priv.pme_stats.status_counters[module_status]++; switch (module_status) { case MLX5_MODULE_STATUS_PLUGGED_ENABLED: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged and enabled\n", module_num); + device_printf((&pdev->dev)->bsddev, + "INFO: Module %u, status: plugged and enabled\n", + module_num); break; case MLX5_MODULE_STATUS_UNPLUGGED: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: unplugged\n", module_num); + device_printf((&pdev->dev)->bsddev, + "INFO: Module %u, status: unplugged\n", module_num); break; case MLX5_MODULE_STATUS_ERROR: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: error, %s\n", module_num, mlx5_port_module_event_error_type_to_string(error_type)); + device_printf((&pdev->dev)->bsddev, + "ERROR: Module %u, status: error, %s\n", + module_num, + mlx5_port_module_event_error_type_to_string(error_type)); + if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) + dev->priv.pme_stats.error_counters[error_type]++; break; - case MLX5_MODULE_STATUS_PLUGGED_DISABLED: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged but disabled\n", module_num); - break; - default: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, unknown status\n", module_num); + device_printf((&pdev->dev)->bsddev, + "INFO: Module %u, unknown status\n", module_num); } /* store module status */ if (module_num < MLX5_MAX_PORTS) Modified: stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c ============================================================================== --- stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c Mon Oct 7 09:01:21 2019 (r353205) +++ stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c Mon Oct 7 09:01:59 2019 (r353206) @@ -1226,13 +1226,31 @@ struct mlx5_core_event_handler { void *data); }; +#define MLX5_STATS_DESC(a, b, c, d, e, ...) d, e, + +#define MLX5_PORT_MODULE_ERROR_STATS(m) \ +m(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \ +m(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \ +m(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \ +m(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \ +m(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \ +m(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \ +m(+1, u64, high_temp, "high_temp", "Module High Temperature") \ +m(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted") + +static const char *mlx5_pme_err_desc[] = { + MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC) +}; + static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; struct mlx5_priv *priv; device_t bsddev = pdev->dev.bsddev; - int err; + int i,err; + struct sysctl_oid *pme_sysctl_node; + struct sysctl_oid *pme_err_sysctl_node; dev = kzalloc(sizeof(*dev), GFP_KERNEL); priv = &dev->priv; @@ -1264,6 +1282,41 @@ static int init_one(struct pci_dev *pdev, OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0, "Current power value in Watts"); + pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), + OID_AUTO, "pme_stats", CTLFLAG_RD, NULL, + "Port module event statistics"); + if (pme_sysctl_node == NULL) { + err = -ENOMEM; + goto clean_sysctl_ctx; + } + pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx, + SYSCTL_CHILDREN(pme_sysctl_node), + OID_AUTO, "errors", CTLFLAG_RD, NULL, + "Port module event error statistics"); + if (pme_err_sysctl_node == NULL) { + err = -ENOMEM; + goto clean_sysctl_ctx; + } + SYSCTL_ADD_U64(&dev->sysctl_ctx, + SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO, + "module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE, + &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED], + 0, "Number of time module plugged"); + SYSCTL_ADD_U64(&dev->sysctl_ctx, + SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO, + "module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE, + &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED], + 0, "Number of time module unplugged"); + for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) { + SYSCTL_ADD_U64(&dev->sysctl_ctx, + SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO, + mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE, + &dev->priv.pme_stats.error_counters[i], + 0, mlx5_pme_err_desc[2 * i + 1]); + } + + INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&dev->pci_status_mutex); @@ -1302,8 +1355,9 @@ clean_health: close_pci: mlx5_pci_close(dev, priv); clean_dev: - sysctl_ctx_free(&dev->sysctl_ctx); mtx_destroy(&dev->dump_lock); +clean_sysctl_ctx: + sysctl_ctx_free(&dev->sysctl_ctx); kfree(dev); return err; } Modified: stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c ============================================================================== --- stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Mon Oct 7 09:01:21 2019 (r353205) +++ stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Mon Oct 7 09:01:59 2019 (r353206) @@ -3269,8 +3269,7 @@ out: } /* Check if module is present before doing an access */ module_status = mlx5_query_module_status(priv->mdev, module_num); - if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED && - module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) { + if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) { error = EINVAL; goto err_i2c; }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201910070902.x97920qW072318>