From owner-svn-src-all@freebsd.org Wed Oct 2 09:29:57 2019 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 8CD34128ACE; Wed, 2 Oct 2019 09:29:57 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) server-signature RSA-PSS (4096 bits) client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 46jrT11zg6z45dl; Wed, 2 Oct 2019 09:29:57 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 22BD5296FD; Wed, 2 Oct 2019 09:29:57 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id x929Tua7004738; Wed, 2 Oct 2019 09:29:56 GMT (envelope-from hselasky@FreeBSD.org) Received: (from hselasky@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id x929Tuwe004733; Wed, 2 Oct 2019 09:29:56 GMT (envelope-from hselasky@FreeBSD.org) Message-Id: <201910020929.x929Tuwe004733@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: hselasky set sender to hselasky@FreeBSD.org using -f From: Hans Petter Selasky Date: Wed, 2 Oct 2019 09:29:56 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r352966 - in head/sys/dev/mlx5: . mlx5_core mlx5_en X-SVN-Group: head X-SVN-Commit-Author: hselasky X-SVN-Commit-Paths: in head/sys/dev/mlx5: . mlx5_core mlx5_en X-SVN-Commit-Revision: 352966 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 02 Oct 2019 09:29:57 -0000 Author: hselasky Date: Wed Oct 2 09:29:55 2019 New Revision: 352966 URL: https://svnweb.freebsd.org/changeset/base/352966 Log: Add port module event software counters in mlx5core. While at it, fixup PME based on latest PRM defines. Submitted by: slavash@ MFC after: 3 days Sponsored by: Mellanox Technologies Modified: head/sys/dev/mlx5/device.h head/sys/dev/mlx5/driver.h head/sys/dev/mlx5/mlx5_core/mlx5_eq.c head/sys/dev/mlx5/mlx5_core/mlx5_main.c head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Modified: head/sys/dev/mlx5/device.h ============================================================================== --- head/sys/dev/mlx5/device.h Wed Oct 2 09:27:56 2019 (r352965) +++ head/sys/dev/mlx5/device.h Wed Oct 2 09:29:55 2019 (r352966) @@ -537,7 +537,7 @@ enum { MLX5_MODULE_STATUS_PLUGGED_ENABLED = 0x1, MLX5_MODULE_STATUS_UNPLUGGED = 0x2, MLX5_MODULE_STATUS_ERROR = 0x3, - MLX5_MODULE_STATUS_PLUGGED_DISABLED = 0x4, + MLX5_MODULE_STATUS_NUM , }; enum { @@ -549,7 +549,7 @@ enum { MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE = 0x5, MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE = 0x6, MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED = 0x7, - MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED = 0xc, + MLX5_MODULE_EVENT_ERROR_NUM , }; struct mlx5_eqe_port_module_event { Modified: head/sys/dev/mlx5/driver.h ============================================================================== --- head/sys/dev/mlx5/driver.h Wed Oct 2 09:27:56 2019 (r352965) +++ head/sys/dev/mlx5/driver.h Wed Oct 2 09:29:55 2019 (r352966) @@ -569,6 +569,11 @@ struct mlx5_rl_table { }; #endif +struct mlx5_pme_stats { + u64 status_counters[MLX5_MODULE_STATUS_NUM]; + u64 error_counters[MLX5_MODULE_EVENT_ERROR_NUM]; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -624,6 +629,7 @@ struct mlx5_priv { #ifdef RATELIMIT struct mlx5_rl_table rl_table; #endif + struct mlx5_pme_stats pme_stats; }; enum mlx5_device_state { Modified: head/sys/dev/mlx5/mlx5_core/mlx5_eq.c ============================================================================== --- head/sys/dev/mlx5/mlx5_core/mlx5_eq.c Wed Oct 2 09:27:56 2019 (r352965) +++ head/sys/dev/mlx5/mlx5_core/mlx5_eq.c Wed Oct 2 09:29:55 2019 (r352966) @@ -639,9 +639,9 @@ static const char *mlx5_port_module_event_error_type_t { switch (error_type) { case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED: - return "Power Budget Exceeded"; + return "Power budget exceeded"; case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX_CABLE_MODULE: - return "Long Range for non MLNX cable/module"; + return "Long Range for non MLNX cable"; case MLX5_MODULE_EVENT_ERROR_BUS_STUCK: return "Bus stuck(I2C or data shorted)"; case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT: @@ -649,18 +649,11 @@ static const char *mlx5_port_module_event_error_type_t case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST: return "Enforce part number list"; case MLX5_MODULE_EVENT_ERROR_UNSUPPORTED_CABLE: - return "Unsupported Cable"; + return "Unknown identifier"; case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE: return "High Temperature"; case MLX5_MODULE_EVENT_ERROR_CABLE_IS_SHORTED: - return "Cable is shorted"; - case MLX5_MODULE_EVENT_ERROR_PCIE_SYSTEM_POWER_SLOT_EXCEEDED: - return "One or more network ports have been powered " - "down due to insufficient/unadvertised power on " - "the PCIe slot. Please refer to the card's user " - "manual for power specifications or contact " - "Mellanox support."; - + return "Bad or shorted cable/module"; default: return "Unknown error type"; } @@ -686,29 +679,36 @@ static void mlx5_port_module_event(struct mlx5_core_de module_num = (unsigned int)module_event_eqe->module; module_status = (unsigned int)module_event_eqe->module_status & - PORT_MODULE_EVENT_MODULE_STATUS_MASK; + PORT_MODULE_EVENT_MODULE_STATUS_MASK; error_type = (unsigned int)module_event_eqe->error_type & - PORT_MODULE_EVENT_ERROR_TYPE_MASK; + PORT_MODULE_EVENT_ERROR_TYPE_MASK; + if (module_status < MLX5_MODULE_STATUS_NUM) + dev->priv.pme_stats.status_counters[module_status]++; switch (module_status) { case MLX5_MODULE_STATUS_PLUGGED_ENABLED: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged and enabled\n", module_num); + device_printf((&pdev->dev)->bsddev, + "INFO: Module %u, status: plugged and enabled\n", + module_num); break; case MLX5_MODULE_STATUS_UNPLUGGED: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: unplugged\n", module_num); + device_printf((&pdev->dev)->bsddev, + "INFO: Module %u, status: unplugged\n", module_num); break; case MLX5_MODULE_STATUS_ERROR: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: error, %s\n", module_num, mlx5_port_module_event_error_type_to_string(error_type)); + device_printf((&pdev->dev)->bsddev, + "ERROR: Module %u, status: error, %s\n", + module_num, + mlx5_port_module_event_error_type_to_string(error_type)); + if (error_type < MLX5_MODULE_EVENT_ERROR_NUM) + dev->priv.pme_stats.error_counters[error_type]++; break; - case MLX5_MODULE_STATUS_PLUGGED_DISABLED: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, status: plugged but disabled\n", module_num); - break; - default: - device_printf((&pdev->dev)->bsddev, "INFO: ""Module %u, unknown status\n", module_num); + device_printf((&pdev->dev)->bsddev, + "INFO: Module %u, unknown status\n", module_num); } /* store module status */ if (module_num < MLX5_MAX_PORTS) Modified: head/sys/dev/mlx5/mlx5_core/mlx5_main.c ============================================================================== --- head/sys/dev/mlx5/mlx5_core/mlx5_main.c Wed Oct 2 09:27:56 2019 (r352965) +++ head/sys/dev/mlx5/mlx5_core/mlx5_main.c Wed Oct 2 09:29:55 2019 (r352966) @@ -1244,13 +1244,31 @@ struct mlx5_core_event_handler { void *data); }; +#define MLX5_STATS_DESC(a, b, c, d, e, ...) d, e, + +#define MLX5_PORT_MODULE_ERROR_STATS(m) \ +m(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \ +m(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \ +m(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \ +m(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \ +m(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \ +m(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \ +m(+1, u64, high_temp, "high_temp", "Module High Temperature") \ +m(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted") + +static const char *mlx5_pme_err_desc[] = { + MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC) +}; + static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; struct mlx5_priv *priv; device_t bsddev = pdev->dev.bsddev; - int err; + int i,err; + struct sysctl_oid *pme_sysctl_node; + struct sysctl_oid *pme_err_sysctl_node; dev = kzalloc(sizeof(*dev), GFP_KERNEL); priv = &dev->priv; @@ -1282,6 +1300,41 @@ static int init_one(struct pci_dev *pdev, OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0, "Current power value in Watts"); + pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), + OID_AUTO, "pme_stats", CTLFLAG_RD, NULL, + "Port module event statistics"); + if (pme_sysctl_node == NULL) { + err = -ENOMEM; + goto clean_sysctl_ctx; + } + pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx, + SYSCTL_CHILDREN(pme_sysctl_node), + OID_AUTO, "errors", CTLFLAG_RD, NULL, + "Port module event error statistics"); + if (pme_err_sysctl_node == NULL) { + err = -ENOMEM; + goto clean_sysctl_ctx; + } + SYSCTL_ADD_U64(&dev->sysctl_ctx, + SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO, + "module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE, + &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED], + 0, "Number of time module plugged"); + SYSCTL_ADD_U64(&dev->sysctl_ctx, + SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO, + "module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE, + &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED], + 0, "Number of time module unplugged"); + for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) { + SYSCTL_ADD_U64(&dev->sysctl_ctx, + SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO, + mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE, + &dev->priv.pme_stats.error_counters[i], + 0, mlx5_pme_err_desc[2 * i + 1]); + } + + INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); mutex_init(&dev->pci_status_mutex); @@ -1320,8 +1373,9 @@ clean_health: close_pci: mlx5_pci_close(dev, priv); clean_dev: - sysctl_ctx_free(&dev->sysctl_ctx); mtx_destroy(&dev->dump_lock); +clean_sysctl_ctx: + sysctl_ctx_free(&dev->sysctl_ctx); kfree(dev); return err; } Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c ============================================================================== --- head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Wed Oct 2 09:27:56 2019 (r352965) +++ head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Wed Oct 2 09:29:55 2019 (r352966) @@ -3389,8 +3389,7 @@ out: } /* Check if module is present before doing an access */ module_status = mlx5_query_module_status(priv->mdev, module_num); - if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED && - module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) { + if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED) { error = EINVAL; goto err_i2c; }