From owner-svn-src-head@freebsd.org Thu Nov 9 11:48:23 2017 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 9F476E726C0; Thu, 9 Nov 2017 11:48:23 +0000 (UTC) (envelope-from mw@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 611D73068; Thu, 9 Nov 2017 11:48:23 +0000 (UTC) (envelope-from mw@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id vA9BmMUg045666; Thu, 9 Nov 2017 11:48:22 GMT (envelope-from mw@FreeBSD.org) Received: (from mw@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id vA9BmMCl045663; Thu, 9 Nov 2017 11:48:22 GMT (envelope-from mw@FreeBSD.org) Message-Id: <201711091148.vA9BmMCl045663@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mw set sender to mw@FreeBSD.org using -f From: Marcin Wojtas Date: Thu, 9 Nov 2017 11:48:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r325576 - head/sys/dev/ena X-SVN-Group: head X-SVN-Commit-Author: mw X-SVN-Commit-Paths: head/sys/dev/ena X-SVN-Commit-Revision: 325576 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 09 Nov 2017 11:48:23 -0000 Author: mw Date: Thu Nov 9 11:48:22 2017 New Revision: 325576 URL: https://svnweb.freebsd.org/changeset/base/325576 Log: Check for Rx ring state to prevent from stall in the ENA driver In case when Rx ring is full and driver will fail to allocate Rx mbufs, the ring could be stalled. Keep alive is checking every second for Rx ring state, and if it is full for two cycles, then trigger rx_cleanup routine in another thread. Submitted by: Michal Krawczyk Reviewed by: byenduri_gmail.com Obtained from: Semihalf Sponsored by: Amazon, Inc. Differential Revision: https://reviews.freebsd.org/D12856 Modified: head/sys/dev/ena/ena.c head/sys/dev/ena/ena.h head/sys/dev/ena/ena_sysctl.c Modified: head/sys/dev/ena/ena.c ============================================================================== --- head/sys/dev/ena/ena.c Thu Nov 9 11:47:47 2017 (r325575) +++ head/sys/dev/ena/ena.c Thu Nov 9 11:48:22 2017 (r325576) @@ -122,6 +122,7 @@ static void ena_destroy_all_rx_queues(struct ena_adapt static void ena_destroy_all_io_queues(struct ena_adapter *); static int ena_create_io_queues(struct ena_adapter *); static int ena_tx_cleanup(struct ena_ring *); +static void ena_deferred_rx_cleanup(void *, int); static int ena_rx_cleanup(struct ena_ring *); static inline int validate_tx_req_id(struct ena_ring *, uint16_t); static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *, @@ -471,6 +472,7 @@ ena_init_io_rings(struct ena_adapter *adapter) device_get_nameunit(adapter->pdev), i); mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF); + mtx_init(&rxr->ring_mtx, rxr->mtx_name, NULL, MTX_DEF); que = &adapter->que[i]; que->adapter = adapter; @@ -480,6 +482,8 @@ ena_init_io_rings(struct ena_adapter *adapter) txr->que = que; rxr->que = que; + + rxr->empty_rx_queue = 0; } } @@ -495,6 +499,7 @@ ena_free_io_ring_resources(struct ena_adapter *adapter sizeof(rxr->rx_stats)); mtx_destroy(&txr->ring_mtx); + mtx_destroy(&rxr->ring_mtx); drbr_free(txr->br, M_DEVBUF); @@ -847,6 +852,22 @@ ena_setup_rx_resources(struct ena_adapter *adapter, un } } + /* Allocate taskqueues */ + TASK_INIT(&rx_ring->cmpl_task, 0, ena_deferred_rx_cleanup, rx_ring); + rx_ring->cmpl_tq = taskqueue_create_fast("ena RX completion", M_WAITOK, + taskqueue_thread_enqueue, &rx_ring->cmpl_tq); + + /* RSS set cpu for thread */ +#ifdef RSS + CPU_SETOF(que->cpu, &cpu_mask); + taskqueue_start_threads_cpuset(&rx_ring->cmpl_tq, 1, PI_NET, &cpu_mask, + "%s rx_ring cmpl (bucket %d)", + device_get_nameunit(adapter->pdev), que->cpu); +#else + taskqueue_start_threads(&rx_ring->cmpl_tq, 1, PI_NET, + "%s rx_ring cmpl %d", device_get_nameunit(adapter->pdev), que->cpu); +#endif + return (0); err_rx_dma: @@ -877,6 +898,11 @@ ena_free_rx_resources(struct ena_adapter *adapter, uns ena_trace(ENA_INFO, "%s qid %d\n", __func__, qid); + while (taskqueue_cancel(rx_ring->cmpl_tq, &rx_ring->cmpl_task, NULL) != 0) + taskqueue_drain(rx_ring->cmpl_tq, &rx_ring->cmpl_task); + + taskqueue_free(rx_ring->cmpl_tq); + /* Free buffer DMA maps, */ for (int i = 0; i < rx_ring->ring_size; i++) { m_freem(rx_ring->rx_buffer_info[i].mbuf); @@ -1565,6 +1591,24 @@ ena_rx_checksum(struct ena_ring *rx_ring, struct ena_c return; } +static void +ena_deferred_rx_cleanup(void *arg, int pending) +{ + struct ena_ring *rx_ring = arg; + int budget = CLEAN_BUDGET; + + ENA_RING_MTX_LOCK(rx_ring); + /* + * If deferred task was executed, perform cleanup of all awaiting + * descs (or until given budget is depleted to avoid infinite loop). + */ + while (budget--) { + if (ena_rx_cleanup(rx_ring) == 0) + break; + } + ENA_RING_MTX_UNLOCK(rx_ring); +} + /** * ena_rx_cleanup - handle rx irq * @arg: ring for which irq is being handled @@ -1736,7 +1780,17 @@ ena_handle_msix(void *arg) io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; for (i = 0; i < CLEAN_BUDGET; ++i) { - rxc = ena_rx_cleanup(rx_ring); + /* + * If lock cannot be acquired, then deferred cleanup task was + * being executed and rx ring is being cleaned up in + * another thread. + */ + if (ENA_RING_MTX_TRYLOCK(rx_ring)) { + rxc = ena_rx_cleanup(rx_ring); + ENA_RING_MTX_UNLOCK(rx_ring); + } else { + rxc = 0; + } /* Protection from calling ena_tx_cleanup from ena_start_xmit */ ENA_RING_MTX_LOCK(tx_ring); @@ -3374,7 +3428,54 @@ static void check_for_missing_tx_completions(struct en adapter->next_monitored_tx_qid = i % adapter->num_queues; } +/* trigger deferred rx cleanup after 2 consecutive detections */ +#define EMPTY_RX_REFILL 2 +/* For the rare case where the device runs out of Rx descriptors and the + * msix handler failed to refill new Rx descriptors (due to a lack of memory + * for example). + * This case will lead to a deadlock: + * The device won't send interrupts since all the new Rx packets will be dropped + * The msix handler won't allocate new Rx descriptors so the device won't be + * able to send new packets. + * + * When such a situation is detected - execute rx cleanup task in another thread + */ +static void +check_for_empty_rx_ring(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, refill_required; + if (!adapter->up) + return; + + if (adapter->trigger_reset) + return; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + + refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq); + if (unlikely(refill_required == (rx_ring->ring_size - 1))) { + rx_ring->empty_rx_queue++; + + if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { + counter_u64_add(rx_ring->rx_stats.empty_rx_ring, + 1); + + device_printf(adapter->pdev, + "trigger refill for ring %d\n", i); + + taskqueue_enqueue(rx_ring->cmpl_tq, + &rx_ring->cmpl_task); + rx_ring->empty_rx_queue = 0; + } + } else { + rx_ring->empty_rx_queue = 0; + } + } +} + static void ena_timer_service(void *data) { @@ -3387,6 +3488,8 @@ ena_timer_service(void *data) check_for_admin_com_state(adapter); check_for_missing_tx_completions(adapter); + + check_for_empty_rx_ring(adapter); if (host_info) ena_update_host_info(host_info, adapter->ifp); Modified: head/sys/dev/ena/ena.h ============================================================================== --- head/sys/dev/ena/ena.h Thu Nov 9 11:47:47 2017 (r325575) +++ head/sys/dev/ena/ena.h Thu Nov 9 11:48:22 2017 (r325576) @@ -284,15 +284,23 @@ struct ena_ring { struct buf_ring *br; /* only for TX */ struct mtx ring_mtx; char mtx_name[16]; - struct task enqueue_task; - struct taskqueue *enqueue_tq; - struct task cmpl_task; - struct taskqueue *cmpl_tq; + union { + struct { + struct task enqueue_task; + struct taskqueue *enqueue_tq; + }; + struct { + struct task cmpl_task; + struct taskqueue *cmpl_tq; + }; + }; union { struct ena_stats_tx tx_stats; struct ena_stats_rx rx_stats; }; + + int empty_rx_queue; } __aligned(CACHE_LINE_SIZE); Modified: head/sys/dev/ena/ena_sysctl.c ============================================================================== --- head/sys/dev/ena/ena_sysctl.c Thu Nov 9 11:47:47 2017 (r325575) +++ head/sys/dev/ena/ena_sysctl.c Thu Nov 9 11:48:22 2017 (r325576) @@ -200,6 +200,9 @@ ena_sysctl_add_stats(struct ena_adapter *adapter) SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id", CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count"); + SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, + "empty_rx_ring", CTLFLAG_RD, + &rx_stats->empty_rx_ring, "RX descriptors depletion count"); } /* Stats read from device */