Date: Mon, 12 Jul 2021 13:09:57 GMT From: Hans Petter Selasky <hselasky@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 8abf5ac0e6dd - main - mlx5ib: Implement support for enabling and disabling RoCE ECN. Message-ID: <202107121309.16CD9vsu095478@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by hselasky: URL: https://cgit.FreeBSD.org/src/commit/?id=8abf5ac0e6ddaeddf49cf39193bbe0c3ebf7209b commit 8abf5ac0e6ddaeddf49cf39193bbe0c3ebf7209b Author: Hans Petter Selasky <hselasky@FreeBSD.org> AuthorDate: 2021-06-16 13:01:56 +0000 Commit: Hans Petter Selasky <hselasky@FreeBSD.org> CommitDate: 2021-07-12 12:22:33 +0000 mlx5ib: Implement support for enabling and disabling RoCE ECN. RoCE is short for Remote direct memory access over Converged Ethernet. ECN is short for Explicit Congestion Notification. MFC after: 1 week Reviewed by: kib Sponsored by: Mellanox Technologies // NVIDIA Networking --- sys/dev/mlx5/cmd.h | 4 ++ sys/dev/mlx5/mlx5_core/mlx5_cmd.c | 23 ++++++++++ sys/dev/mlx5/mlx5_ib/mlx5_ib.h | 39 +++++++++++++++++ sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c | 84 +++++++++++++++++++++++++++++++++++++ 4 files changed, 150 insertions(+) diff --git a/sys/dev/mlx5/cmd.h b/sys/dev/mlx5/cmd.h index 674a8ab44acd..babcaac58ee3 100644 --- a/sys/dev/mlx5/cmd.h +++ b/sys/dev/mlx5/cmd.h @@ -50,4 +50,8 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, void *out, int out_size); int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev, void *in, int in_size); +int mlx5_cmd_query_cong_status(struct mlx5_core_dev *dev, int cong_point, + int prio, void *out, int out_size); +int mlx5_cmd_modify_cong_status(struct mlx5_core_dev *mdev, + void *in, int in_size); #endif /* MLX5_CMD_H */ diff --git a/sys/dev/mlx5/mlx5_core/mlx5_cmd.c b/sys/dev/mlx5/mlx5_core/mlx5_cmd.c index c6cc3fee8c43..c4e8b32ffcd5 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_cmd.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_cmd.c @@ -1668,3 +1668,26 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_cmd_modify_cong_params); + +int mlx5_cmd_query_cong_status(struct mlx5_core_dev *dev, int cong_point, + int prio, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_cong_status_in)] = { }; + + MLX5_SET(query_cong_status_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATUS); + MLX5_SET(query_cong_status_in, in, priority, prio); + MLX5_SET(query_cong_status_in, in, cong_protocol, cong_point); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); +} +EXPORT_SYMBOL(mlx5_cmd_query_cong_status); + +int mlx5_cmd_modify_cong_status(struct mlx5_core_dev *dev, + void *in, int in_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_cong_status_out)] = { }; + + return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_cmd_modify_cong_status); diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h index 0ac5368ca3b2..49f6e87868ff 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib.h +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib.h @@ -678,6 +678,44 @@ struct mlx5_roce { #define MLX5_IB_CONG_STATS_NUM (0 MLX5_IB_CONG_STATS(MLX5_IB_STATS_COUNT)) +#define MLX5_IB_CONG_STATUS(m) \ + /* ECN RP */ \ + m(+1, u64, rp_0_enable, "rp_0_enable", "Enable reaction point, priority 0", MLX5_IB_RROCE_ECN_RP, 0, enable) \ + m(+1, u64, rp_1_enable, "rp_1_enable", "Enable reaction point, priority 1", MLX5_IB_RROCE_ECN_RP, 1, enable) \ + m(+1, u64, rp_2_enable, "rp_2_enable", "Enable reaction point, priority 2", MLX5_IB_RROCE_ECN_RP, 2, enable) \ + m(+1, u64, rp_3_enable, "rp_3_enable", "Enable reaction point, priority 3", MLX5_IB_RROCE_ECN_RP, 3, enable) \ + m(+1, u64, rp_4_enable, "rp_4_enable", "Enable reaction point, priority 4", MLX5_IB_RROCE_ECN_RP, 4, enable) \ + m(+1, u64, rp_5_enable, "rp_5_enable", "Enable reaction point, priority 5", MLX5_IB_RROCE_ECN_RP, 5, enable) \ + m(+1, u64, rp_6_enable, "rp_6_enable", "Enable reaction point, priority 6", MLX5_IB_RROCE_ECN_RP, 6, enable) \ + m(+1, u64, rp_7_enable, "rp_7_enable", "Enable reaction point, priority 7", MLX5_IB_RROCE_ECN_RP, 7, enable) \ + m(+1, u64, rp_8_enable, "rp_8_enable", "Enable reaction point, priority 8", MLX5_IB_RROCE_ECN_RP, 8, enable) \ + m(+1, u64, rp_9_enable, "rp_9_enable", "Enable reaction point, priority 9", MLX5_IB_RROCE_ECN_RP, 9, enable) \ + m(+1, u64, rp_10_enable, "rp_10_enable", "Enable reaction point, priority 10", MLX5_IB_RROCE_ECN_RP, 10, enable) \ + m(+1, u64, rp_11_enable, "rp_11_enable", "Enable reaction point, priority 11", MLX5_IB_RROCE_ECN_RP, 11, enable) \ + m(+1, u64, rp_12_enable, "rp_12_enable", "Enable reaction point, priority 12", MLX5_IB_RROCE_ECN_RP, 12, enable) \ + m(+1, u64, rp_13_enable, "rp_13_enable", "Enable reaction point, priority 13", MLX5_IB_RROCE_ECN_RP, 13, enable) \ + m(+1, u64, rp_14_enable, "rp_14_enable", "Enable reaction point, priority 14", MLX5_IB_RROCE_ECN_RP, 14, enable) \ + m(+1, u64, rp_15_enable, "rp_15_enable", "Enable reaction point, priority 15", MLX5_IB_RROCE_ECN_RP, 15, enable) \ + /* ECN NP */ \ + m(+1, u64, np_0_enable, "np_0_enable", "Enable notification point, priority 0", MLX5_IB_RROCE_ECN_NP, 0, enable) \ + m(+1, u64, np_1_enable, "np_1_enable", "Enable notification point, priority 1", MLX5_IB_RROCE_ECN_NP, 1, enable) \ + m(+1, u64, np_2_enable, "np_2_enable", "Enable notification point, priority 2", MLX5_IB_RROCE_ECN_NP, 2, enable) \ + m(+1, u64, np_3_enable, "np_3_enable", "Enable notification point, priority 3", MLX5_IB_RROCE_ECN_NP, 3, enable) \ + m(+1, u64, np_4_enable, "np_4_enable", "Enable notification point, priority 4", MLX5_IB_RROCE_ECN_NP, 4, enable) \ + m(+1, u64, np_5_enable, "np_5_enable", "Enable notification point, priority 5", MLX5_IB_RROCE_ECN_NP, 5, enable) \ + m(+1, u64, np_6_enable, "np_6_enable", "Enable notification point, priority 6", MLX5_IB_RROCE_ECN_NP, 6, enable) \ + m(+1, u64, np_7_enable, "np_7_enable", "Enable notification point, priority 7", MLX5_IB_RROCE_ECN_NP, 7, enable) \ + m(+1, u64, np_8_enable, "np_8_enable", "Enable notification point, priority 8", MLX5_IB_RROCE_ECN_NP, 8, enable) \ + m(+1, u64, np_9_enable, "np_9_enable", "Enable notification point, priority 9", MLX5_IB_RROCE_ECN_NP, 9, enable) \ + m(+1, u64, np_10_enable, "np_10_enable", "Enable notification point, priority 10", MLX5_IB_RROCE_ECN_NP, 10, enable) \ + m(+1, u64, np_11_enable, "np_11_enable", "Enable notification point, priority 11", MLX5_IB_RROCE_ECN_NP, 11, enable) \ + m(+1, u64, np_12_enable, "np_12_enable", "Enable notification point, priority 12", MLX5_IB_RROCE_ECN_NP, 12, enable) \ + m(+1, u64, np_13_enable, "np_13_enable", "Enable notification point, priority 13", MLX5_IB_RROCE_ECN_NP, 13, enable) \ + m(+1, u64, np_14_enable, "np_14_enable", "Enable notification point, priority 14", MLX5_IB_RROCE_ECN_NP, 14, enable) \ + m(+1, u64, np_15_enable, "np_15_enable", "Enable notification point, priority 15", MLX5_IB_RROCE_ECN_NP, 15, enable) \ + +#define MLX5_IB_CONG_STATUS_NUM (0 MLX5_IB_CONG_STATUS(MLX5_IB_STATS_COUNT)) + struct mlx5_ib_congestion { struct sysctl_ctx_list ctx; struct sx lock; @@ -687,6 +725,7 @@ struct mlx5_ib_congestion { struct { MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_VAR) MLX5_IB_CONG_STATS(MLX5_IB_STATS_VAR) + MLX5_IB_CONG_STATUS(MLX5_IB_STATS_VAR) }; }; }; diff --git a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c index 0fc6694bde82..85ba77362cca 100644 --- a/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c +++ b/sys/dev/mlx5/mlx5_ib/mlx5_ib_cong.c @@ -33,6 +33,10 @@ static const char *mlx5_ib_cong_params_desc[] = { MLX5_IB_CONG_PARAMS(MLX5_IB_STATS_DESC) }; +static const char *mlx5_ib_cong_status_desc[] = { + MLX5_IB_CONG_STATUS(MLX5_IB_STATS_DESC) +}; + static const char *mlx5_ib_cong_stats_desc[] = { MLX5_IB_CONG_STATS(MLX5_IB_STATS_DESC) }; @@ -346,6 +350,72 @@ done: return (error); } +static int +mlx5_ib_get_all_cc_status(struct mlx5_ib_dev *dev) +{ + const int outlen = MLX5_ST_SZ_BYTES(query_cong_status_out); + uint32_t out[MLX5_ST_SZ_DW(query_cong_status_out)] = {}; + int error; + +#define MLX5_IB_CONG_STATUS_READ(a,b,c,d,e,node,prio,field) do { \ + error = mlx5_cmd_query_cong_status(dev->mdev, node, prio, out, outlen); \ + if (error) \ + goto done; \ + dev->congestion.c = MLX5_GET(query_cong_status_out, out, field); \ +} while (0); + + MLX5_IB_CONG_STATUS(MLX5_IB_CONG_STATUS_READ); +done: + return (error); +} + +static int +mlx5_ib_cong_status_handler(SYSCTL_HANDLER_ARGS) +{ + const int inlen = MLX5_ST_SZ_BYTES(modify_cong_status_in); + uint32_t in[MLX5_ST_SZ_DW(modify_cong_status_in)] = {}; + struct mlx5_ib_dev *dev = arg1; + u64 value; + int error; + + CONG_LOCK(dev); + value = dev->congestion.arg[arg2]; + if (req != NULL) { + error = sysctl_handle_64(oidp, &value, 0, req); + /* convert value into a boolean */ + value = value ? 1 : 0; + if (error || req->newptr == NULL || + value == dev->congestion.arg[arg2]) + goto done; + + /* assign new binary value */ + dev->congestion.arg[arg2] = value; + } else { + error = 0; + } + if (!MLX5_CAP_GEN(dev->mdev, cc_modify_allowed)) + error = EPERM; + else switch (arg2) { +#define MLX5_IB_CONG_STATUS_WRITE(a,b,c,d,e,node,prio,field) \ + case MLX5_IB_INDEX(c): \ + MLX5_SET(modify_cong_status_in, in, opcode, \ + MLX5_CMD_OP_MODIFY_CONG_STATUS); \ + MLX5_SET(modify_cong_status_in, in, priority, prio); \ + MLX5_SET(modify_cong_status_in, in, cong_protocol, node); \ + MLX5_SET(modify_cong_status_in, in, field, value); \ + error = -mlx5_cmd_modify_cong_status(dev->mdev, in, inlen); \ + break; + MLX5_IB_CONG_STATUS(MLX5_IB_CONG_STATUS_WRITE) + default: + error = EINVAL; + break; + } +done: + CONG_UNLOCK(dev); + + return (error); +} + #define MLX5_GET_UNALIGNED_64(t,p,f) \ (((u64)MLX5_GET(t,p,f##_high) << 32) | MLX5_GET(t,p,f##_low)) @@ -422,6 +492,10 @@ mlx5_ib_init_congestion(struct mlx5_ib_dev *dev) if (err) return (err); + err = mlx5_ib_get_all_cc_status(dev); + if (err) + return (err); + parent = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(dev->ib_dev.dev.kobj.oidp), OID_AUTO, "cong", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Congestion control"); @@ -445,6 +519,16 @@ mlx5_ib_init_congestion(struct mlx5_ib_dev *dev) mlx5_ib_cong_params_desc[2 * x + 1]); } + for (x = 0; x != MLX5_IB_CONG_STATUS_NUM; x++) { + SYSCTL_ADD_PROC(ctx, + SYSCTL_CHILDREN(node), OID_AUTO, + mlx5_ib_cong_status_desc[2 * x], + CTLTYPE_U64 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + dev, x + MLX5_IB_CONG_PARAMS_NUM + MLX5_IB_CONG_STATS_NUM, + &mlx5_ib_cong_status_handler, "QU", + mlx5_ib_cong_status_desc[2 * x + 1]); + } + node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(parent), OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics");
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202107121309.16CD9vsu095478>