Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 8 May 2019 10:23:33 +0000 (UTC)
From:      Hans Petter Selasky <hselasky@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r347246 - in head/sys: compat/linuxkpi/common/include/linux conf dev/mlx5/mlx5_en modules/mlx5en
Message-ID:  <201905081023.x48ANXcU038769@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: hselasky
Date: Wed May  8 10:23:33 2019
New Revision: 347246
URL: https://svnweb.freebsd.org/changeset/base/347246

Log:
  Add support for Dynamic Interrupt Moderation, DIM, in mlx5en(4).
  
  Add support for DIM based on Linux,
  with some minor adaptions specific to FreeBSD.
  
  Linux commit
  f97c3dc3c0e8d23a5c4357d182afeef4c67f5c33
  
  MFC after:	3 days
  Sponsored by:	Mellanox Technologies

Added:
  head/sys/compat/linuxkpi/common/include/linux/net_dim.h   (contents, props changed)
  head/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/dev/mlx5/mlx5_en/en.h
  head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c
  head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
  head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
  head/sys/modules/mlx5en/Makefile

Added: head/sys/compat/linuxkpi/common/include/linux/net_dim.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/compat/linuxkpi/common/include/linux/net_dim.h	Wed May  8 10:23:33 2019	(r347246)
@@ -0,0 +1,410 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
+ *
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017-2018, Broadcom Limited. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+/* This file implements Dynamic Interrupt Moderation, DIM */
+
+#ifndef NET_DIM_H
+#define	NET_DIM_H
+
+#include <asm/types.h>
+
+#include <linux/workqueue.h>
+#include <linux/ktime.h>
+
+struct net_dim_cq_moder {
+	u16	usec;
+	u16	pkts;
+	u8	cq_period_mode;
+};
+
+struct net_dim_sample {
+	ktime_t	time;
+	u32	pkt_ctr;
+	u32	byte_ctr;
+	u16	event_ctr;
+};
+
+struct net_dim_stats {
+	int	ppms;			/* packets per msec */
+	int	bpms;			/* bytes per msec */
+	int	epms;			/* events per msec */
+};
+
+struct net_dim {			/* Adaptive Moderation */
+	u8	state;
+	struct net_dim_stats prev_stats;
+	struct net_dim_sample start_sample;
+	struct work_struct work;
+	u16	event_ctr;
+	u8	profile_ix;
+	u8	mode;
+	u8	tune_state;
+	u8	steps_right;
+	u8	steps_left;
+	u8	tired;
+};
+
+enum {
+	NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
+	NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
+	NET_DIM_CQ_PERIOD_NUM_MODES = 0x2,
+	NET_DIM_CQ_PERIOD_MODE_DISABLED = 0xFF,
+};
+
+/* Adaptive moderation logic */
+enum {
+	NET_DIM_START_MEASURE,
+	NET_DIM_MEASURE_IN_PROGRESS,
+	NET_DIM_APPLY_NEW_PROFILE,
+};
+
+enum {
+	NET_DIM_PARKING_ON_TOP,
+	NET_DIM_PARKING_TIRED,
+	NET_DIM_GOING_RIGHT,
+	NET_DIM_GOING_LEFT,
+};
+
+enum {
+	NET_DIM_STATS_WORSE,
+	NET_DIM_STATS_SAME,
+	NET_DIM_STATS_BETTER,
+};
+
+enum {
+	NET_DIM_STEPPED,
+	NET_DIM_TOO_TIRED,
+	NET_DIM_ON_EDGE,
+};
+
+#define	NET_DIM_PARAMS_NUM_PROFILES 5
+/* Adaptive moderation profiles */
+#define	NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
+#define	NET_DIM_DEF_PROFILE_CQE 1
+#define	NET_DIM_DEF_PROFILE_EQE 1
+
+/* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */
+#define	NET_DIM_EQE_PROFILES { \
+	{1,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{8,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{64,  NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+}
+
+#define	NET_DIM_CQE_PROFILES { \
+	{2,  256},             \
+	{8,  128},             \
+	{16, 64},              \
+	{32, 64},              \
+	{64, 64}               \
+}
+
+static const struct net_dim_cq_moder
+	net_dim_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+	NET_DIM_EQE_PROFILES,
+	NET_DIM_CQE_PROFILES,
+};
+
+static inline struct net_dim_cq_moder
+net_dim_get_profile(u8 cq_period_mode,
+    int ix)
+{
+	struct net_dim_cq_moder cq_moder;
+
+	cq_moder = net_dim_profile[cq_period_mode][ix];
+	cq_moder.cq_period_mode = cq_period_mode;
+	return cq_moder;
+}
+
+static inline struct net_dim_cq_moder
+net_dim_get_def_profile(u8 rx_cq_period_mode)
+{
+	int default_profile_ix;
+
+	if (rx_cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE)
+		default_profile_ix = NET_DIM_DEF_PROFILE_CQE;
+	else	/* NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE */
+		default_profile_ix = NET_DIM_DEF_PROFILE_EQE;
+
+	return net_dim_get_profile(rx_cq_period_mode, default_profile_ix);
+}
+
+static inline bool
+net_dim_on_top(struct net_dim *dim)
+{
+	switch (dim->tune_state) {
+	case NET_DIM_PARKING_ON_TOP:
+	case NET_DIM_PARKING_TIRED:
+		return true;
+	case NET_DIM_GOING_RIGHT:
+		return (dim->steps_left > 1) && (dim->steps_right == 1);
+	default:	/* NET_DIM_GOING_LEFT */
+		return (dim->steps_right > 1) && (dim->steps_left == 1);
+	}
+}
+
+static inline void
+net_dim_turn(struct net_dim *dim)
+{
+	switch (dim->tune_state) {
+	case NET_DIM_PARKING_ON_TOP:
+	case NET_DIM_PARKING_TIRED:
+		break;
+	case NET_DIM_GOING_RIGHT:
+		dim->tune_state = NET_DIM_GOING_LEFT;
+		dim->steps_left = 0;
+		break;
+	case NET_DIM_GOING_LEFT:
+		dim->tune_state = NET_DIM_GOING_RIGHT;
+		dim->steps_right = 0;
+		break;
+	}
+}
+
+static inline int
+net_dim_step(struct net_dim *dim)
+{
+	if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
+		return NET_DIM_TOO_TIRED;
+
+	switch (dim->tune_state) {
+	case NET_DIM_PARKING_ON_TOP:
+	case NET_DIM_PARKING_TIRED:
+		break;
+	case NET_DIM_GOING_RIGHT:
+		if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
+			return NET_DIM_ON_EDGE;
+		dim->profile_ix++;
+		dim->steps_right++;
+		break;
+	case NET_DIM_GOING_LEFT:
+		if (dim->profile_ix == 0)
+			return NET_DIM_ON_EDGE;
+		dim->profile_ix--;
+		dim->steps_left++;
+		break;
+	}
+
+	dim->tired++;
+	return NET_DIM_STEPPED;
+}
+
+static inline void
+net_dim_park_on_top(struct net_dim *dim)
+{
+	dim->steps_right = 0;
+	dim->steps_left = 0;
+	dim->tired = 0;
+	dim->tune_state = NET_DIM_PARKING_ON_TOP;
+}
+
+static inline void
+net_dim_park_tired(struct net_dim *dim)
+{
+	dim->steps_right = 0;
+	dim->steps_left = 0;
+	dim->tune_state = NET_DIM_PARKING_TIRED;
+}
+
+static inline void
+net_dim_exit_parking(struct net_dim *dim)
+{
+	dim->tune_state = dim->profile_ix ? NET_DIM_GOING_LEFT :
+	NET_DIM_GOING_RIGHT;
+	net_dim_step(dim);
+}
+
+#define	IS_SIGNIFICANT_DIFF(val, ref) \
+	(((100UL * abs((val) - (ref))) / (ref)) > 10)	/* more than 10%
+							 * difference */
+
+static inline int
+net_dim_stats_compare(struct net_dim_stats *curr,
+    struct net_dim_stats *prev)
+{
+	if (!prev->bpms)
+		return curr->bpms ? NET_DIM_STATS_BETTER :
+		NET_DIM_STATS_SAME;
+
+	if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
+		return (curr->bpms > prev->bpms) ? NET_DIM_STATS_BETTER :
+		    NET_DIM_STATS_WORSE;
+
+	if (!prev->ppms)
+		return curr->ppms ? NET_DIM_STATS_BETTER :
+		    NET_DIM_STATS_SAME;
+
+	if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
+		return (curr->ppms > prev->ppms) ? NET_DIM_STATS_BETTER :
+		    NET_DIM_STATS_WORSE;
+
+	if (!prev->epms)
+		return NET_DIM_STATS_SAME;
+
+	if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
+		return (curr->epms < prev->epms) ? NET_DIM_STATS_BETTER :
+		    NET_DIM_STATS_WORSE;
+
+	return NET_DIM_STATS_SAME;
+}
+
+static inline bool
+net_dim_decision(struct net_dim_stats *curr_stats,
+    struct net_dim *dim)
+{
+	int prev_state = dim->tune_state;
+	int prev_ix = dim->profile_ix;
+	int stats_res;
+	int step_res;
+
+	switch (dim->tune_state) {
+	case NET_DIM_PARKING_ON_TOP:
+		stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
+		if (stats_res != NET_DIM_STATS_SAME)
+			net_dim_exit_parking(dim);
+		break;
+
+	case NET_DIM_PARKING_TIRED:
+		dim->tired--;
+		if (!dim->tired)
+			net_dim_exit_parking(dim);
+		break;
+
+	case NET_DIM_GOING_RIGHT:
+	case NET_DIM_GOING_LEFT:
+		stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
+		if (stats_res != NET_DIM_STATS_BETTER)
+			net_dim_turn(dim);
+
+		if (net_dim_on_top(dim)) {
+			net_dim_park_on_top(dim);
+			break;
+		}
+		step_res = net_dim_step(dim);
+		switch (step_res) {
+		case NET_DIM_ON_EDGE:
+			net_dim_park_on_top(dim);
+			break;
+		case NET_DIM_TOO_TIRED:
+			net_dim_park_tired(dim);
+			break;
+		}
+
+		break;
+	}
+
+	if ((prev_state != NET_DIM_PARKING_ON_TOP) ||
+	    (dim->tune_state != NET_DIM_PARKING_ON_TOP))
+		dim->prev_stats = *curr_stats;
+
+	return dim->profile_ix != prev_ix;
+}
+
+static inline void
+net_dim_sample(u16 event_ctr,
+    u64 packets,
+    u64 bytes,
+    struct net_dim_sample *s)
+{
+	s->time = ktime_get();
+	s->pkt_ctr = packets;
+	s->byte_ctr = bytes;
+	s->event_ctr = event_ctr;
+}
+
+#define	NET_DIM_NEVENTS 64
+#define	BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
+
+static inline void
+net_dim_calc_stats(struct net_dim_sample *start,
+    struct net_dim_sample *end,
+    struct net_dim_stats *curr_stats)
+{
+	/* u32 holds up to 71 minutes, should be enough */
+	u32 delta_us = ktime_us_delta(end->time, start->time);
+	u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
+	u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
+	    start->byte_ctr);
+
+	if (!delta_us)
+		return;
+
+	curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
+	curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
+	curr_stats->epms = DIV_ROUND_UP(NET_DIM_NEVENTS * USEC_PER_MSEC,
+	    delta_us);
+}
+
+static inline void
+net_dim(struct net_dim *dim,
+    u64 packets, u64 bytes)
+{
+	struct net_dim_stats curr_stats;
+	struct net_dim_sample end_sample;
+	u16 nevents;
+
+	dim->event_ctr++;
+
+	switch (dim->state) {
+	case NET_DIM_MEASURE_IN_PROGRESS:
+		nevents = BIT_GAP(BITS_PER_TYPE(u16),
+		    dim->event_ctr,
+		    dim->start_sample.event_ctr);
+		if (nevents < NET_DIM_NEVENTS)
+			break;
+		net_dim_sample(dim->event_ctr, packets, bytes, &end_sample);
+		net_dim_calc_stats(&dim->start_sample, &end_sample,
+		    &curr_stats);
+		if (net_dim_decision(&curr_stats, dim)) {
+			dim->state = NET_DIM_APPLY_NEW_PROFILE;
+			schedule_work(&dim->work);
+			break;
+		}
+		/* FALLTHROUGH */
+	case NET_DIM_START_MEASURE:
+		net_dim_sample(dim->event_ctr, packets, bytes, &dim->start_sample);
+		dim->state = NET_DIM_MEASURE_IN_PROGRESS;
+		break;
+	case NET_DIM_APPLY_NEW_PROFILE:
+		break;
+	default:
+		break;
+	}
+}
+
+#endif					/* NET_DIM_H */

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Wed May  8 09:03:43 2019	(r347245)
+++ head/sys/conf/files	Wed May  8 10:23:33 2019	(r347246)
@@ -4764,6 +4764,8 @@ dev/mlx5/mlx5_core/mlx5_wq.c			optional mlx5 pci	\
 dev/mlx5/mlx5_lib/mlx5_gid.c			optional mlx5 pci	\
 	compile-with "${OFED_C}"
 
+dev/mlx5/mlx5_en/mlx5_en_dim.c			optional mlx5en pci inet inet6	\
+	compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_ethtool.c		optional mlx5en pci inet inet6	\
 	compile-with "${OFED_C}"
 dev/mlx5/mlx5_en/mlx5_en_main.c			optional mlx5en pci inet inet6	\

Modified: head/sys/dev/mlx5/mlx5_en/en.h
==============================================================================
--- head/sys/dev/mlx5/mlx5_en/en.h	Wed May  8 09:03:43 2019	(r347245)
+++ head/sys/dev/mlx5/mlx5_en/en.h	Wed May  8 10:23:33 2019	(r347246)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2018 Mellanox Technologies. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -38,6 +38,8 @@
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/ktime.h>
+#include <linux/net_dim.h>
 
 #include <netinet/in_systm.h>
 #include <netinet/in.h>
@@ -95,6 +97,8 @@
 #define	MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ \
     MIN(65535, MLX5E_MAX_RX_SEGS * MLX5E_MAX_RX_BYTES)
 #endif
+#define	MLX5E_DIM_DEFAULT_PROFILE 3
+#define	MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO	16
 #define	MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC      0x10
 #define	MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE	0x3
 #define	MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS      0x20
@@ -472,7 +476,7 @@ struct mlx5e_params {
   m(+1, u64 coalesce_pkts_max, "coalesce_pkts_max", "Maximum packets to join") \
   m(+1, u64 rx_coalesce_usecs, "rx_coalesce_usecs", "Limit in usec for joining rx packets") \
   m(+1, u64 rx_coalesce_pkts, "rx_coalesce_pkts", "Maximum number of rx packets to join") \
-  m(+1, u64 rx_coalesce_mode, "rx_coalesce_mode", "0: EQE mode 1: CQE mode") \
+  m(+1, u64 rx_coalesce_mode, "rx_coalesce_mode", "0: EQE fixed mode 1: CQE fixed mode 2: EQE auto mode 3: CQE auto mode") \
   m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \
   m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \
   m(+1, u64 tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \
@@ -562,6 +566,9 @@ struct mlx5e_rq {
 	volatile int enabled;
 	int	ix;
 
+	/* Dynamic Interrupt Moderation */
+	struct net_dim dim;
+
 	/* control */
 	struct mlx5_wq_ctrl wq_ctrl;
 	u32	rqn;
@@ -881,6 +888,9 @@ void	mlx5e_cq_error_event(struct mlx5_core_cq *mcq, in
 void	mlx5e_rx_cq_comp(struct mlx5_core_cq *);
 void	mlx5e_tx_cq_comp(struct mlx5_core_cq *);
 struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
+
+void	mlx5e_dim_work(struct work_struct *);
+void	mlx5e_dim_build_cq_param(struct mlx5e_priv *, struct mlx5e_cq_param *);
 
 int	mlx5e_open_flow_table(struct mlx5e_priv *priv);
 void	mlx5e_close_flow_table(struct mlx5e_priv *priv);

Added: head/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_dim.c	Wed May  8 10:23:33 2019	(r347246)
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "en.h"
+
+void
+mlx5e_dim_build_cq_param(struct mlx5e_priv *priv,
+    struct mlx5e_cq_param *param)
+{
+	struct net_dim_cq_moder prof;
+	void *cqc = param->cqc;
+
+	if (priv->params.rx_cq_moderation_mode < 2)
+		return;
+
+	switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
+	case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
+		prof = net_dim_profile[NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE]
+		    [NET_DIM_DEF_PROFILE_CQE];
+		MLX5_SET(cqc, cqc, cq_period, prof.usec);
+		MLX5_SET(cqc, cqc, cq_max_count, prof.pkts);
+		break;
+
+	case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
+		prof = net_dim_profile[NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE]
+		    [NET_DIM_DEF_PROFILE_EQE];
+		MLX5_SET(cqc, cqc, cq_period, prof.usec);
+		MLX5_SET(cqc, cqc, cq_max_count, prof.pkts);
+		break;
+	default:
+		break;
+	}
+}
+
+void
+mlx5e_dim_work(struct work_struct *work)
+{
+	struct net_dim *dim = container_of(work, struct net_dim, work);
+	struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
+	struct mlx5e_channel *c = container_of(rq, struct mlx5e_channel, rq);
+	struct net_dim_cq_moder cur_profile;
+	u8 profile_ix;
+	u8 mode;
+
+	/* copy current auto moderation settings and set new state */
+	mtx_lock(&rq->mtx);
+	profile_ix = dim->profile_ix;
+	mode = dim->mode;
+	dim->state = NET_DIM_START_MEASURE;
+	mtx_unlock(&rq->mtx);
+
+	/* check for invalid mode */
+	if (mode == 255)
+		return;
+
+	/* get current profile */
+	cur_profile = net_dim_profile[mode][profile_ix];
+
+	/* apply LRO restrictions */
+	if (c->priv->params.hw_lro_en &&
+	    cur_profile.pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
+		cur_profile.pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
+	}
+
+	/* modify CQ */
+	mlx5_core_modify_cq_moderation(c->priv->mdev, &rq->cq.mcq,
+	    cur_profile.usec, cur_profile.pkts);
+}

Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c	Wed May  8 09:03:43 2019	(r347245)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c	Wed May  8 10:23:33 2019	(r347246)
@@ -629,8 +629,8 @@ mlx5e_ethtool_handler(SYSCTL_HANDLER_ARGS)
 			mlx5e_close_locked(priv->ifp);
 
 		/* import RX coalesce mode */
-		if (priv->params_ethtool.rx_coalesce_mode != 0)
-			priv->params_ethtool.rx_coalesce_mode = 1;
+		if (priv->params_ethtool.rx_coalesce_mode > 3)
+			priv->params_ethtool.rx_coalesce_mode = 3;
 		priv->params.rx_cq_moderation_mode =
 		    priv->params_ethtool.rx_coalesce_mode;
 

Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c	Wed May  8 09:03:43 2019	(r347245)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c	Wed May  8 10:23:33 2019	(r347246)
@@ -915,6 +915,26 @@ mlx5e_create_rq(struct mlx5e_channel *c,
 #endif
 	}
 
+	INIT_WORK(&rq->dim.work, mlx5e_dim_work);
+	if (priv->params.rx_cq_moderation_mode < 2) {
+		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
+	} else {
+		void *cqc = container_of(param,
+		    struct mlx5e_channel_param, rq)->rx_cq.cqc;
+
+		switch (MLX5_GET(cqc, cqc, cq_period_mode)) {
+		case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
+			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+			break;
+		case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
+			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
+			break;
+		default:
+			rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
+			break;
+		}
+	}
+
 	rq->ifp = c->tag.m_snd_tag.ifp;
 	rq->channel = c;
 	rq->ix = c->ix;
@@ -1116,6 +1136,7 @@ mlx5e_close_rq_wait(struct mlx5e_rq *rq)
 		rq->cq.mcq.comp(&rq->cq.mcq);
 	}
 
+	cancel_work_sync(&rq->dim.work);
 	mlx5e_disable_rq(rq);
 	mlx5e_destroy_rq(rq);
 }
@@ -1916,9 +1937,23 @@ mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
 }
 
 static void
+mlx5e_get_default_profile(struct mlx5e_priv *priv, int mode, struct net_dim_cq_moder *ptr)
+{
+
+	*ptr = net_dim_get_profile(mode, MLX5E_DIM_DEFAULT_PROFILE);
+
+	/* apply LRO restrictions */
+	if (priv->params.hw_lro_en &&
+	    ptr->pkts > MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO) {
+		ptr->pkts = MLX5E_DIM_MAX_RX_CQ_MODERATION_PKTS_WITH_LRO;
+	}
+}
+
+static void
 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
     struct mlx5e_cq_param *param)
 {
+	struct net_dim_cq_moder curr;
 	void *cqc = param->cqc;
 
 
@@ -1932,21 +1967,42 @@ mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 	}
 
 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
-	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
-	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
 
 	switch (priv->params.rx_cq_moderation_mode) {
 	case 0:
+		MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
+		MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
-	default:
+	case 1:
+		MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
+		MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 		else
 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
 		break;
+	case 2:
+		mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE, &curr);
+		MLX5_SET(cqc, cqc, cq_period, curr.usec);
+		MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
+		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+		break;
+	case 3:
+		mlx5e_get_default_profile(priv, NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE, &curr);
+		MLX5_SET(cqc, cqc, cq_period, curr.usec);
+		MLX5_SET(cqc, cqc, cq_max_count, curr.pkts);
+		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
+			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+		else
+			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
+		break;
+	default:
+		break;
 	}
 
+	mlx5e_dim_build_cq_param(priv, param);
+
 	mlx5e_build_common_cq_param(priv, param);
 }
 
@@ -2037,6 +2093,7 @@ mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struc
 
 		switch (priv->params.tx_cq_moderation_mode) {
 		case 0:
+		case 2:
 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 			break;
 		default:
@@ -2061,22 +2118,49 @@ mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struc
 
 	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
 		uint8_t cq_mode;
+		uint8_t dim_mode;
 		int retval;
 
 		switch (priv->params.rx_cq_moderation_mode) {
 		case 0:
+		case 2:
 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
+			dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 			break;
 		default:
 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
+			dim_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
 			break;
 		}
 
-		retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
-		    priv->params.rx_cq_moderation_usec,
-		    priv->params.rx_cq_moderation_pkts,
-		    cq_mode);
+		/* tear down dynamic interrupt moderation */
+		mtx_lock(&rq->mtx);
+		rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_DISABLED;
+		mtx_unlock(&rq->mtx);
 
+		/* wait for dynamic interrupt moderation work task, if any */
+		cancel_work_sync(&rq->dim.work);
+
+		if (priv->params.rx_cq_moderation_mode >= 2) {
+			struct net_dim_cq_moder curr;
+
+			mlx5e_get_default_profile(priv, dim_mode, &curr);
+
+			retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
+			    curr.usec, curr.pkts, cq_mode);
+
+			/* set dynamic interrupt moderation mode and zero defaults */
+			mtx_lock(&rq->mtx);
+			rq->dim.mode = dim_mode;
+			rq->dim.state = 0;
+			rq->dim.profile_ix = MLX5E_DIM_DEFAULT_PROFILE;
+			mtx_unlock(&rq->mtx);
+		} else {
+			retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
+			    priv->params.rx_cq_moderation_usec,
+			    priv->params.rx_cq_moderation_pkts,
+			    cq_mode);
+		}
 		return (retval);
 	}
 

Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c
==============================================================================
--- head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c	Wed May  8 09:03:43 2019	(r347245)
+++ head/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c	Wed May  8 10:23:33 2019	(r347246)
@@ -585,6 +585,9 @@ mlx5e_rx_cq_comp(struct mlx5_core_cq *mcq)
 		mlx5e_post_rx_wqes(rq);
 	}
 	mlx5e_post_rx_wqes(rq);
+	/* check for dynamic interrupt moderation callback */
+	if (rq->dim.mode != NET_DIM_CQ_PERIOD_MODE_DISABLED)
+		net_dim(&rq->dim, rq->stats.packets, rq->stats.bytes);
 	mlx5e_cq_arm(&rq->cq, MLX5_GET_DOORBELL_LOCK(&rq->channel->priv->doorbell_lock));
 	tcp_lro_flush_all(&rq->lro);
 	mtx_unlock(&rq->mtx);

Modified: head/sys/modules/mlx5en/Makefile
==============================================================================
--- head/sys/modules/mlx5en/Makefile	Wed May  8 09:03:43 2019	(r347245)
+++ head/sys/modules/mlx5en/Makefile	Wed May  8 10:23:33 2019	(r347246)
@@ -3,6 +3,7 @@
 
 KMOD=mlx5en
 SRCS= \
+mlx5_en_dim.c \
 mlx5_en_ethtool.c \
 mlx5_en_main.c \
 mlx5_en_tx.c \



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201905081023.x48ANXcU038769>