From owner-svn-src-all@freebsd.org Fri Jan 20 15:45:23 2017 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 651CCCB829B; Fri, 20 Jan 2017 15:45:23 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 251991733; Fri, 20 Jan 2017 15:45:23 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id v0KFjMtX001855; Fri, 20 Jan 2017 15:45:22 GMT (envelope-from hselasky@FreeBSD.org) Received: (from hselasky@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id v0KFjLDw001850; Fri, 20 Jan 2017 15:45:21 GMT (envelope-from hselasky@FreeBSD.org) Message-Id: <201701201545.v0KFjLDw001850@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: hselasky set sender to hselasky@FreeBSD.org using -f From: Hans Petter Selasky Date: Fri, 20 Jan 2017 15:45:21 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r312536 - head/sys/dev/mlx5/mlx5_en X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 20 Jan 2017 15:45:23 -0000 Author: hselasky Date: Fri Jan 20 15:45:21 2017 New Revision: 312536 URL: https://svnweb.freebsd.org/changeset/base/312536 Log: Allow transmit packet bufring in software to be disabled. - Add new sysctl node to control the transmit packet bufring. - Add optimised version of the transmit routine which output packets directly to the DMA ring instead of using bufring in case the transmit lock is congested. This can reduce the number of taskswitches which in turn influence the overall system CPU usage, depending on the workload. - Add " TX" suffix to debug name for transmit mutexes to silence some witness warnings about aquiring duplicate locks having same name. MFC after: 1 week Sponsored by: Mellanox Technologies Suggested by: gallatin @ Modified: head/sys/dev/mlx5/mlx5_en/en.h head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c Modified: head/sys/dev/mlx5/mlx5_en/en.h ============================================================================== --- head/sys/dev/mlx5/mlx5_en/en.h Fri Jan 20 15:01:04 2017 (r312535) +++ head/sys/dev/mlx5/mlx5_en/en.h Fri Jan 20 15:45:21 2017 (r312536) @@ -402,6 +402,7 @@ struct mlx5e_params { m(+1, u64 tx_coalesce_usecs, "tx_coalesce_usecs", "Limit in usec for joining tx packets") \ m(+1, u64 tx_coalesce_pkts, "tx_coalesce_pkts", "Maximum number of tx packets to join") \ m(+1, u64 tx_coalesce_mode, "tx_coalesce_mode", "0: EQE mode 1: CQE mode") \ + m(+1, u64 tx_bufring_disable, "tx_bufring_disable", "0: Enable bufring 1: Disable bufring") \ m(+1, u64 tx_completion_fact, "tx_completion_fact", "1..MAX: Completion event ratio") \ m(+1, u64 tx_completion_fact_max, "tx_completion_fact_max", "Maximum completion event ratio") \ m(+1, u64 hw_lro, "hw_lro", "set to enable hw_lro") \ Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c ============================================================================== --- head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c Fri Jan 20 15:01:04 2017 (r312535) +++ head/sys/dev/mlx5/mlx5_en/mlx5_en_ethtool.c Fri Jan 20 15:45:21 2017 (r312536) @@ -352,6 +352,18 @@ mlx5e_ethtool_handler(SYSCTL_HANDLER_ARG mlx5e_open_locked(priv->ifp); break; + case MLX5_PARAM_OFFSET(tx_bufring_disable): + /* rangecheck input value */ + priv->params_ethtool.tx_bufring_disable = + priv->params_ethtool.tx_bufring_disable ? 1 : 0; + + /* reconfigure the sendqueues, if any */ + if (was_opened) { + mlx5e_close_locked(priv->ifp); + mlx5e_open_locked(priv->ifp); + } + break; + case MLX5_PARAM_OFFSET(tx_completion_fact): /* network interface must be down */ if (was_opened) Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c ============================================================================== --- head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Fri Jan 20 15:01:04 2017 (r312535) +++ head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c Fri Jan 20 15:45:21 2017 (r312536) @@ -439,7 +439,8 @@ mlx5e_update_stats_work(struct work_stru tso_packets += sq_stats->tso_packets; tso_bytes += sq_stats->tso_bytes; tx_queue_dropped += sq_stats->dropped; - tx_queue_dropped += sq_br->br_drops; + if (sq_br != NULL) + tx_queue_dropped += sq_br->br_drops; tx_defragged += sq_stats->defragged; tx_offload_none += sq_stats->csum_offload_none; } @@ -987,34 +988,37 @@ mlx5e_create_sq(struct mlx5e_channel *c, sq->priv = priv; sq->tc = tc; - sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN, - M_WAITOK, &sq->lock); - if (sq->br == NULL) { - if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n", - __func__); - err = -ENOMEM; - goto err_free_sq_db; - } - - sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK, - taskqueue_thread_enqueue, &sq->sq_tq); - if (sq->sq_tq == NULL) { - if_printf(c->ifp, "%s: Failed allocating taskqueue\n", - __func__); - err = -ENOMEM; - goto err_free_drbr; - } + /* check if we should allocate a second packet buffer */ + if (priv->params_ethtool.tx_bufring_disable == 0) { + sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN, + M_WAITOK, &sq->lock); + if (sq->br == NULL) { + if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n", + __func__); + err = -ENOMEM; + goto err_free_sq_db; + } + + sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK, + taskqueue_thread_enqueue, &sq->sq_tq); + if (sq->sq_tq == NULL) { + if_printf(c->ifp, "%s: Failed allocating taskqueue\n", + __func__); + err = -ENOMEM; + goto err_free_drbr; + } - TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq); + TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq); #ifdef RSS - cpu_id = rss_getcpu(c->ix % rss_getnumbuckets()); - CPU_SETOF(cpu_id, &cpu_mask); - taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask, - "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id); + cpu_id = rss_getcpu(c->ix % rss_getnumbuckets()); + CPU_SETOF(cpu_id, &cpu_mask); + taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask, + "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id); #else - taskqueue_start_threads(&sq->sq_tq, 1, PI_NET, - "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc); + taskqueue_start_threads(&sq->sq_tq, 1, PI_NET, + "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc); #endif + } snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc); mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM, @@ -1047,9 +1051,12 @@ mlx5e_destroy_sq(struct mlx5e_sq *sq) mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar); - taskqueue_drain(sq->sq_tq, &sq->sq_task); - taskqueue_free(sq->sq_tq); - buf_ring_free(sq->br, M_MLX5EN); + if (sq->sq_tq != NULL) { + taskqueue_drain(sq->sq_tq, &sq->sq_task); + taskqueue_free(sq->sq_tq); + } + if (sq->br != NULL) + buf_ring_free(sq->br, M_MLX5EN); } int @@ -1497,9 +1504,10 @@ mlx5e_chan_mtx_init(struct mlx5e_channel for (tc = 0; tc < c->num_tc; tc++) { struct mlx5e_sq *sq = c->sq + tc; - mtx_init(&sq->lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF); - mtx_init(&sq->comp_lock, "mlx5comp", MTX_NETWORK_LOCK, - MTX_DEF); + mtx_init(&sq->lock, "mlx5tx", + MTX_NETWORK_LOCK " TX", MTX_DEF); + mtx_init(&sq->comp_lock, "mlx5comp", + MTX_NETWORK_LOCK " TX", MTX_DEF); callout_init_mtx(&sq->cev_callout, &sq->lock, 0); Modified: head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c ============================================================================== --- head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c Fri Jan 20 15:01:04 2017 (r312535) +++ head/sys/dev/mlx5/mlx5_en/mlx5_en_tx.c Fri Jan 20 15:45:21 2017 (r312536) @@ -439,7 +439,8 @@ mlx5e_poll_tx_cq(struct mlx5e_sq *sq, in sq->cc = sqcc; - if (atomic_cmpset_int(&sq->queue_state, MLX5E_SQ_FULL, MLX5E_SQ_READY)) + if (sq->sq_tq != NULL && + atomic_cmpset_int(&sq->queue_state, MLX5E_SQ_FULL, MLX5E_SQ_READY)) taskqueue_enqueue(sq->sq_tq, &sq->sq_task); } @@ -498,6 +499,45 @@ mlx5e_xmit_locked(struct ifnet *ifp, str return (err); } +static int +mlx5e_xmit_locked_no_br(struct ifnet *ifp, struct mlx5e_sq *sq, struct mbuf *mb) +{ + int err = 0; + + if (unlikely((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + sq->stopped != 0)) { + m_freem(mb); + return (ENETDOWN); + } + + /* Do transmit */ + if (mlx5e_sq_xmit(sq, &mb) != 0) { + /* NOTE: m_freem() is NULL safe */ + m_freem(mb); + err = ENOBUFS; + } + + /* Check if we need to write the doorbell */ + if (likely(sq->doorbell.d64 != 0)) { + mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); + sq->doorbell.d64 = 0; + } + + /* + * Check if we need to start the event timer which flushes the + * transmit ring on timeout: + */ + if (unlikely(sq->cev_next_state == MLX5E_CEV_STATE_INITIAL && + sq->cev_factor != 1)) { + /* start the timer */ + mlx5e_sq_cev_timeout(sq); + } else { + /* don't send NOPs yet */ + sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; + } + return (err); +} + int mlx5e_xmit(struct ifnet *ifp, struct mbuf *mb) { @@ -510,7 +550,13 @@ mlx5e_xmit(struct ifnet *ifp, struct mbu m_freem(mb); return (ENXIO); } - if (mtx_trylock(&sq->lock)) { + + if (unlikely(sq->br == NULL)) { + /* rate limited traffic */ + mtx_lock(&sq->lock); + ret = mlx5e_xmit_locked_no_br(ifp, sq, mb); + mtx_unlock(&sq->lock); + } else if (mtx_trylock(&sq->lock)) { ret = mlx5e_xmit_locked(ifp, sq, mb); mtx_unlock(&sq->lock); } else {