Date: Thu, 2 Jun 2016 11:14:26 +0000 (UTC) From: =?UTF-8?Q?Roger_Pau_Monn=c3=a9?= <royger@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r301197 - head/sys/dev/xen/netfront Message-ID: <201606021114.u52BEQqB047172@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: royger Date: Thu Jun 2 11:14:26 2016 New Revision: 301197 URL: https://svnweb.freebsd.org/changeset/base/301197 Log: xen-netfront: always keep the Rx ring full of requests This is based on Linux commit 1f3c2eba1e2d866ef99bb9b10ade4096e3d7607c from David Vrabel: A full Rx ring only requires 1 MiB of memory. This is not enough memory that it is useful to dynamically scale the number of Rx requests in the ring based on traffic rates, because: a) Even the full 1 MiB is a tiny fraction of a typically modern Linux VM (for example, the AWS micro instance still has 1 GiB of memory). b) Netfront would have used up to 1 MiB already even with moderate data rates (there was no adjustment of target based on memory pressure). c) Small VMs are going to typically have one VCPU and hence only one queue. Keeping the ring full of Rx requests handles bursty traffic better than trying to converge on an optimal number of requests to keep filled. Reviewed by: Wei Liu <wei.liu2@citrix.com> Sponsored by: Citrix Systems R&D Modified: head/sys/dev/xen/netfront/netfront.c Modified: head/sys/dev/xen/netfront/netfront.c ============================================================================== --- head/sys/dev/xen/netfront/netfront.c Thu Jun 2 11:12:11 2016 (r301196) +++ head/sys/dev/xen/netfront/netfront.c Thu Jun 2 11:14:26 2016 (r301197) @@ -77,6 +77,8 @@ __FBSDID("$FreeBSD$"); #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) +#define NET_RX_SLOTS_MIN (XEN_NETIF_NR_SLOTS_MIN + 1) + /* * Should the driver do LRO on the RX end * this can be toggled on the fly, but the @@ -114,6 +116,7 @@ struct netfront_rx_info; static void xn_txeof(struct netfront_txq *); static void xn_rxeof(struct netfront_rxq *); static void xn_alloc_rx_buffers(struct netfront_rxq *); +static void xn_alloc_rx_buffers_callout(void *arg); static void xn_release_rx_bufs(struct netfront_rxq *); static void xn_release_tx_bufs(struct netfront_txq *); @@ -182,16 +185,14 @@ struct netfront_rxq { grant_ref_t grant_ref[NET_TX_RING_SIZE + 1]; struct mbuf *mbufs[NET_RX_RING_SIZE + 1]; - struct mbufq batch; /* batch queue */ - int target; - - xen_pfn_t pfn_array[NET_RX_RING_SIZE]; struct lro_ctrl lro; struct taskqueue *tq; struct task intrtask; + struct callout rx_refill; + struct xn_rx_stats stats; }; @@ -233,12 +234,6 @@ struct netfront_info { u_int carrier; u_int maxfrags; - /* Receive-ring batched refills. */ -#define RX_MIN_TARGET 32 -#define RX_MAX_TARGET NET_RX_RING_SIZE - int rx_min_target; - int rx_max_target; - device_t xbdev; uint8_t mac[ETHER_ADDR_LEN]; @@ -687,6 +682,7 @@ static void destroy_rxq(struct netfront_rxq *rxq) { + callout_drain(&rxq->rx_refill); free(rxq->ring.sring, M_DEVBUF); taskqueue_drain_all(rxq->tq); taskqueue_free(rxq->tq); @@ -721,7 +717,6 @@ setup_rxqs(device_t dev, struct netfront rxq->id = q; rxq->info = info; - rxq->target = RX_MIN_TARGET; rxq->ring_ref = GRANT_REF_INVALID; rxq->ring.sring = NULL; snprintf(rxq->name, XN_QUEUE_NAME_LEN, "xnrx_%u", q); @@ -733,11 +728,9 @@ setup_rxqs(device_t dev, struct netfront rxq->grant_ref[i] = GRANT_REF_INVALID; } - mbufq_init(&rxq->batch, INT_MAX); - /* Start resources allocation */ - if (gnttab_alloc_grant_references(RX_MAX_TARGET, + if (gnttab_alloc_grant_references(NET_RX_RING_SIZE, &rxq->gref_head) != 0) { device_printf(dev, "allocating rx gref"); error = ENOMEM; @@ -760,6 +753,8 @@ setup_rxqs(device_t dev, struct netfront rxq->tq = taskqueue_create_fast(rxq->name, M_WAITOK, taskqueue_thread_enqueue, &rxq->tq); + callout_init(&rxq->rx_refill, 1); + error = taskqueue_start_threads(&rxq->tq, 1, PI_NET, "%s rxq %d", device_get_nameunit(dev), rxq->id); if (error != 0) { @@ -1058,119 +1053,88 @@ xn_release_tx_bufs(struct netfront_txq * } } -static void -xn_alloc_rx_buffers(struct netfront_rxq *rxq) +static struct mbuf * +xn_alloc_one_rx_buffer(struct netfront_rxq *rxq) { - struct netfront_info *np = rxq->info; - int otherend_id = xenbus_get_otherend_id(np->xbdev); - unsigned short id; - struct mbuf *m_new; - int i, batch_target, notify; - RING_IDX req_prod; - grant_ref_t ref; - netif_rx_request_t *req; - vm_offset_t vaddr; - u_long pfn; + struct mbuf *m; - req_prod = rxq->ring.req_prod_pvt; + m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); + if (m == NULL) + return NULL; + m->m_len = m->m_pkthdr.len = MJUMPAGESIZE; - if (__predict_false(np->carrier == 0)) - return; + return (m); +} - /* - * Allocate mbufs greedily, even though we batch updates to the - * receive ring. This creates a less bursty demand on the memory - * allocator, and so should reduce the chance of failed allocation - * requests both for ourself and for other kernel subsystems. - * - * Here we attempt to maintain rx_target buffers in flight, counting - * buffers that we have yet to process in the receive ring. - */ - batch_target = rxq->target - (req_prod - rxq->ring.rsp_cons); - for (i = mbufq_len(&rxq->batch); i < batch_target; i++) { - m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, MJUMPAGESIZE); - if (m_new == NULL) { - if (i != 0) - goto refill; - /* XXX set timer */ - break; - } - m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; +static void +xn_alloc_rx_buffers(struct netfront_rxq *rxq) +{ + RING_IDX req_prod; + int notify; - /* queue the mbufs allocated */ - mbufq_enqueue(&rxq->batch, m_new); - } + XN_RX_LOCK_ASSERT(rxq); - /* - * If we've allocated at least half of our target number of entries, - * submit them to the backend - we have enough to make the overhead - * of submission worthwhile. Otherwise wait for more mbufs and - * request entries to become available. - */ - if (i < (rxq->target/2)) { - if (req_prod > rxq->ring.sring->req_prod) - goto push; + if (__predict_false(rxq->info->carrier == 0)) return; - } - /* - * Double floating fill target if we risked having the backend - * run out of empty buffers for receive traffic. We define "running - * low" as having less than a fourth of our target buffers free - * at the time we refilled the queue. - */ - if ((req_prod - rxq->ring.sring->rsp_prod) < (rxq->target / 4)) { - rxq->target *= 2; - if (rxq->target > np->rx_max_target) - rxq->target = np->rx_max_target; - } + for (req_prod = rxq->ring.req_prod_pvt; + req_prod - rxq->ring.rsp_cons < NET_RX_RING_SIZE; + req_prod++) { + struct mbuf *m; + unsigned short id; + grant_ref_t ref; + struct netif_rx_request *req; + unsigned long pfn; -refill: - for (i = 0; ; i++) { - if ((m_new = mbufq_dequeue(&rxq->batch)) == NULL) + m = xn_alloc_one_rx_buffer(rxq); + if (m == NULL) break; - m_new->m_ext.ext_arg1 = (vm_paddr_t *)(uintptr_t)( - vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); - - id = xn_rxidx(req_prod + i); + id = xn_rxidx(req_prod); KASSERT(rxq->mbufs[id] == NULL, ("non-NULL xn_rx_chain")); - rxq->mbufs[id] = m_new; + rxq->mbufs[id] = m; ref = gnttab_claim_grant_reference(&rxq->gref_head); KASSERT(ref != GNTTAB_LIST_END, - ("reserved grant references exhuasted")); + ("reserved grant references exhuasted")); rxq->grant_ref[id] = ref; - vaddr = mtod(m_new, vm_offset_t); - pfn = vtophys(vaddr) >> PAGE_SHIFT; - req = RING_GET_REQUEST(&rxq->ring, req_prod + i); + pfn = atop(vtophys(mtod(m, vm_offset_t))); + req = RING_GET_REQUEST(&rxq->ring, req_prod); - gnttab_grant_foreign_access_ref(ref, otherend_id, pfn, 0); + gnttab_grant_foreign_access_ref(ref, + xenbus_get_otherend_id(rxq->info->xbdev), pfn, 0); req->id = id; req->gref = ref; + } - rxq->pfn_array[i] = - vtophys(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; + rxq->ring.req_prod_pvt = req_prod; + + /* Not enough requests? Try again later. */ + if (req_prod - rxq->ring.rsp_cons < NET_RX_SLOTS_MIN) { + callout_reset(&rxq->rx_refill, hz/10, xn_alloc_rx_buffers_callout, + rxq); + return; } - KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ - KASSERT(mbufq_len(&rxq->batch) == 0, ("not all mbufs processed")); - /* - * We may have allocated buffers which have entries outstanding - * in the page * update queue -- make sure we flush those first! - */ - wmb(); + wmb(); /* barrier so backend seens requests */ - /* Above is a suitable barrier to ensure backend will see requests. */ - rxq->ring.req_prod_pvt = req_prod + i; -push: RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rxq->ring, notify); if (notify) xen_intr_signal(rxq->xen_intr_handle); } +static void xn_alloc_rx_buffers_callout(void *arg) +{ + struct netfront_rxq *rxq; + + rxq = (struct netfront_rxq *)arg; + XN_RX_LOCK(rxq); + xn_alloc_rx_buffers(rxq); + XN_RX_UNLOCK(rxq); +} + static void xn_release_rx_bufs(struct netfront_rxq *rxq) { @@ -1792,8 +1756,12 @@ xn_ifinit_locked(struct netfront_info *n for (i = 0; i < np->num_queues; i++) { rxq = &np->rxq[i]; + XN_RX_LOCK(rxq); xn_alloc_rx_buffers(rxq); rxq->ring.sring->rsp_event = rxq->ring.rsp_cons + 1; + if (RING_HAS_UNCONSUMED_RESPONSES(&rxq->ring)) + taskqueue_enqueue(rxq->tq, &rxq->intrtask); + XN_RX_UNLOCK(rxq); } ifp->if_drv_flags |= IFF_DRV_RUNNING; @@ -2009,7 +1977,9 @@ xn_connect(struct netfront_info *np) XN_TX_LOCK(txq); xn_txeof(txq); XN_TX_UNLOCK(txq); + XN_RX_LOCK(rxq); xn_alloc_rx_buffers(rxq); + XN_RX_UNLOCK(rxq); } return (0); @@ -2225,9 +2195,6 @@ create_netdev(device_t dev) ifmedia_add(&np->sc_media, IFM_ETHER|IFM_MANUAL, 0, NULL); ifmedia_set(&np->sc_media, IFM_ETHER|IFM_MANUAL); - np->rx_min_target = RX_MIN_TARGET; - np->rx_max_target = RX_MAX_TARGET; - err = xen_net_read_mac(dev, np->mac); if (err != 0) goto error;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201606021114.u52BEQqB047172>