Date: Thu, 9 Nov 2017 11:48:22 +0000 (UTC) From: Marcin Wojtas <mw@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r325576 - head/sys/dev/ena Message-ID: <201711091148.vA9BmMCl045663@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mw Date: Thu Nov 9 11:48:22 2017 New Revision: 325576 URL: https://svnweb.freebsd.org/changeset/base/325576 Log: Check for Rx ring state to prevent from stall in the ENA driver In case when Rx ring is full and driver will fail to allocate Rx mbufs, the ring could be stalled. Keep alive is checking every second for Rx ring state, and if it is full for two cycles, then trigger rx_cleanup routine in another thread. Submitted by: Michal Krawczyk <mk@semihalf.com> Reviewed by: byenduri_gmail.com Obtained from: Semihalf Sponsored by: Amazon, Inc. Differential Revision: https://reviews.freebsd.org/D12856 Modified: head/sys/dev/ena/ena.c head/sys/dev/ena/ena.h head/sys/dev/ena/ena_sysctl.c Modified: head/sys/dev/ena/ena.c ============================================================================== --- head/sys/dev/ena/ena.c Thu Nov 9 11:47:47 2017 (r325575) +++ head/sys/dev/ena/ena.c Thu Nov 9 11:48:22 2017 (r325576) @@ -122,6 +122,7 @@ static void ena_destroy_all_rx_queues(struct ena_adapt static void ena_destroy_all_io_queues(struct ena_adapter *); static int ena_create_io_queues(struct ena_adapter *); static int ena_tx_cleanup(struct ena_ring *); +static void ena_deferred_rx_cleanup(void *, int); static int ena_rx_cleanup(struct ena_ring *); static inline int validate_tx_req_id(struct ena_ring *, uint16_t); static void ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *, @@ -471,6 +472,7 @@ ena_init_io_rings(struct ena_adapter *adapter) device_get_nameunit(adapter->pdev), i); mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF); + mtx_init(&rxr->ring_mtx, rxr->mtx_name, NULL, MTX_DEF); que = &adapter->que[i]; que->adapter = adapter; @@ -480,6 +482,8 @@ ena_init_io_rings(struct ena_adapter *adapter) txr->que = que; rxr->que = que; + + rxr->empty_rx_queue = 0; } } @@ -495,6 +499,7 @@ ena_free_io_ring_resources(struct ena_adapter *adapter sizeof(rxr->rx_stats)); mtx_destroy(&txr->ring_mtx); + mtx_destroy(&rxr->ring_mtx); drbr_free(txr->br, M_DEVBUF); @@ -847,6 +852,22 @@ ena_setup_rx_resources(struct ena_adapter *adapter, un } } + /* Allocate taskqueues */ + TASK_INIT(&rx_ring->cmpl_task, 0, ena_deferred_rx_cleanup, rx_ring); + rx_ring->cmpl_tq = taskqueue_create_fast("ena RX completion", M_WAITOK, + taskqueue_thread_enqueue, &rx_ring->cmpl_tq); + + /* RSS set cpu for thread */ +#ifdef RSS + CPU_SETOF(que->cpu, &cpu_mask); + taskqueue_start_threads_cpuset(&rx_ring->cmpl_tq, 1, PI_NET, &cpu_mask, + "%s rx_ring cmpl (bucket %d)", + device_get_nameunit(adapter->pdev), que->cpu); +#else + taskqueue_start_threads(&rx_ring->cmpl_tq, 1, PI_NET, + "%s rx_ring cmpl %d", device_get_nameunit(adapter->pdev), que->cpu); +#endif + return (0); err_rx_dma: @@ -877,6 +898,11 @@ ena_free_rx_resources(struct ena_adapter *adapter, uns ena_trace(ENA_INFO, "%s qid %d\n", __func__, qid); + while (taskqueue_cancel(rx_ring->cmpl_tq, &rx_ring->cmpl_task, NULL) != 0) + taskqueue_drain(rx_ring->cmpl_tq, &rx_ring->cmpl_task); + + taskqueue_free(rx_ring->cmpl_tq); + /* Free buffer DMA maps, */ for (int i = 0; i < rx_ring->ring_size; i++) { m_freem(rx_ring->rx_buffer_info[i].mbuf); @@ -1565,6 +1591,24 @@ ena_rx_checksum(struct ena_ring *rx_ring, struct ena_c return; } +static void +ena_deferred_rx_cleanup(void *arg, int pending) +{ + struct ena_ring *rx_ring = arg; + int budget = CLEAN_BUDGET; + + ENA_RING_MTX_LOCK(rx_ring); + /* + * If deferred task was executed, perform cleanup of all awaiting + * descs (or until given budget is depleted to avoid infinite loop). + */ + while (budget--) { + if (ena_rx_cleanup(rx_ring) == 0) + break; + } + ENA_RING_MTX_UNLOCK(rx_ring); +} + /** * ena_rx_cleanup - handle rx irq * @arg: ring for which irq is being handled @@ -1736,7 +1780,17 @@ ena_handle_msix(void *arg) io_cq = &adapter->ena_dev->io_cq_queues[ena_qid]; for (i = 0; i < CLEAN_BUDGET; ++i) { - rxc = ena_rx_cleanup(rx_ring); + /* + * If lock cannot be acquired, then deferred cleanup task was + * being executed and rx ring is being cleaned up in + * another thread. + */ + if (ENA_RING_MTX_TRYLOCK(rx_ring)) { + rxc = ena_rx_cleanup(rx_ring); + ENA_RING_MTX_UNLOCK(rx_ring); + } else { + rxc = 0; + } /* Protection from calling ena_tx_cleanup from ena_start_xmit */ ENA_RING_MTX_LOCK(tx_ring); @@ -3374,7 +3428,54 @@ static void check_for_missing_tx_completions(struct en adapter->next_monitored_tx_qid = i % adapter->num_queues; } +/* trigger deferred rx cleanup after 2 consecutive detections */ +#define EMPTY_RX_REFILL 2 +/* For the rare case where the device runs out of Rx descriptors and the + * msix handler failed to refill new Rx descriptors (due to a lack of memory + * for example). + * This case will lead to a deadlock: + * The device won't send interrupts since all the new Rx packets will be dropped + * The msix handler won't allocate new Rx descriptors so the device won't be + * able to send new packets. + * + * When such a situation is detected - execute rx cleanup task in another thread + */ +static void +check_for_empty_rx_ring(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, refill_required; + if (!adapter->up) + return; + + if (adapter->trigger_reset) + return; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + + refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq); + if (unlikely(refill_required == (rx_ring->ring_size - 1))) { + rx_ring->empty_rx_queue++; + + if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) { + counter_u64_add(rx_ring->rx_stats.empty_rx_ring, + 1); + + device_printf(adapter->pdev, + "trigger refill for ring %d\n", i); + + taskqueue_enqueue(rx_ring->cmpl_tq, + &rx_ring->cmpl_task); + rx_ring->empty_rx_queue = 0; + } + } else { + rx_ring->empty_rx_queue = 0; + } + } +} + static void ena_timer_service(void *data) { @@ -3387,6 +3488,8 @@ ena_timer_service(void *data) check_for_admin_com_state(adapter); check_for_missing_tx_completions(adapter); + + check_for_empty_rx_ring(adapter); if (host_info) ena_update_host_info(host_info, adapter->ifp); Modified: head/sys/dev/ena/ena.h ============================================================================== --- head/sys/dev/ena/ena.h Thu Nov 9 11:47:47 2017 (r325575) +++ head/sys/dev/ena/ena.h Thu Nov 9 11:48:22 2017 (r325576) @@ -284,15 +284,23 @@ struct ena_ring { struct buf_ring *br; /* only for TX */ struct mtx ring_mtx; char mtx_name[16]; - struct task enqueue_task; - struct taskqueue *enqueue_tq; - struct task cmpl_task; - struct taskqueue *cmpl_tq; + union { + struct { + struct task enqueue_task; + struct taskqueue *enqueue_tq; + }; + struct { + struct task cmpl_task; + struct taskqueue *cmpl_tq; + }; + }; union { struct ena_stats_tx tx_stats; struct ena_stats_rx rx_stats; }; + + int empty_rx_queue; } __aligned(CACHE_LINE_SIZE); Modified: head/sys/dev/ena/ena_sysctl.c ============================================================================== --- head/sys/dev/ena/ena_sysctl.c Thu Nov 9 11:47:47 2017 (r325575) +++ head/sys/dev/ena/ena_sysctl.c Thu Nov 9 11:48:22 2017 (r325576) @@ -200,6 +200,9 @@ ena_sysctl_add_stats(struct ena_adapter *adapter) SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id", CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count"); + SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, + "empty_rx_ring", CTLFLAG_RD, + &rx_stats->empty_rx_ring, "RX descriptors depletion count"); } /* Stats read from device */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201711091148.vA9BmMCl045663>