Date: Fri, 20 Jan 2012 13:10:41 +0000 (UTC) From: Luigi Rizzo <luigi@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r230378 - in user/luigi/netmap/sys/dev: ixgbe netmap Message-ID: <201201201310.q0KDAfEb042963@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: luigi Date: Fri Jan 20 13:10:40 2012 New Revision: 230378 URL: http://svn.freebsd.org/changeset/base/230378 Log: snapshot of current code: - implement more aggressive interrupt throttling on the tx queues - pass rx interrupt mitigation up to the reading process - make interrupt_rate a writable sysctl parameter - correct the value used for computations of interrupt rate (it was off by a factor of 2) Modified: user/luigi/netmap/sys/dev/ixgbe/ixgbe.c user/luigi/netmap/sys/dev/netmap/ixgbe_netmap.h user/luigi/netmap/sys/dev/netmap/netmap.c user/luigi/netmap/sys/dev/netmap/netmap_kern.h Modified: user/luigi/netmap/sys/dev/ixgbe/ixgbe.c ============================================================================== --- user/luigi/netmap/sys/dev/ixgbe/ixgbe.c Fri Jan 20 12:59:12 2012 (r230377) +++ user/luigi/netmap/sys/dev/ixgbe/ixgbe.c Fri Jan 20 13:10:40 2012 (r230378) @@ -229,10 +229,10 @@ MODULE_DEPEND(ixgbe, ether, 1, 1, 1); ** is varied over time based on the ** traffic for that interrupt vector */ -static int ixgbe_enable_aim = TRUE; +static int ixgbe_enable_aim = 0; // TRUE; TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim); -static int ixgbe_max_interrupt_rate = (8000000 / IXGBE_LOW_LATENCY); +static int ixgbe_max_interrupt_rate = (8000000 / 250); // IXGBE_LOW_LATENCY); TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate); /* How many packets rxeof tries to clean at a time */ @@ -3385,7 +3385,11 @@ ixgbe_txeof(struct tx_ring *txr) #ifdef DEV_NETMAP if (ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(ifp); + struct netmap_kring *kring = &na->tx_rings[txr->me]; + tx_desc = (struct ixgbe_legacy_tx_desc *)txr->tx_base; + bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, + BUS_DMASYNC_POSTREAD); /* * In netmap mode, all the work is done in the context * of the client thread. Interrupt handlers only wake up @@ -3395,12 +3399,17 @@ ixgbe_txeof(struct tx_ring *txr) * release and re-acquire txlock to avoid deadlocks. * XXX see if we can find a better way. */ - selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); - IXGBE_TX_UNLOCK(txr); - IXGBE_CORE_LOCK(adapter); - selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET); - IXGBE_CORE_UNLOCK(adapter); - IXGBE_TX_LOCK(txr); + if (!netmap_mitigate || + (kring->nr_kflags < kring->nkr_num_slots && + tx_desc[kring->nr_kflags].upper.fields.status & IXGBE_TXD_STAT_DD)) { + kring->nr_kflags = kring->nkr_num_slots; // invalidate + selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); + IXGBE_TX_UNLOCK(txr); + IXGBE_CORE_LOCK(adapter); + selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET); + IXGBE_CORE_UNLOCK(adapter); + IXGBE_TX_LOCK(txr); + } return FALSE; } #endif /* DEV_NETMAP */ @@ -4302,6 +4311,7 @@ ixgbe_rxeof(struct ix_queue *que, int co */ struct netmap_adapter *na = NA(ifp); + na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR; selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); IXGBE_RX_UNLOCK(rxr); IXGBE_CORE_LOCK(adapter); @@ -4830,7 +4840,7 @@ ixgbe_configure_ivars(struct adapter *ad u32 newitr; if (ixgbe_max_interrupt_rate > 0) - newitr = (8000000 / ixgbe_max_interrupt_rate) & 0x0FF8; + newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; else newitr = 0; @@ -5193,12 +5203,21 @@ ixgbe_sysctl_interrupt_rate_handler(SYSC reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); usec = ((reg & 0x0FF8) >> 3); if (usec > 0) - rate = 1000000 / usec; + rate = 500000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; + reg &= ~0xfff; /* default, no limitation */ + ixgbe_max_interrupt_rate = 0; + if (rate > 0 && rate < 500000) { + if (rate < 1000) + rate = 1000; + ixgbe_max_interrupt_rate = rate; + reg |= ((4000000/rate) & 0xff8 ); + } + IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); return 0; } @@ -5252,10 +5271,13 @@ ixgbe_add_hw_stats(struct adapter *adapt queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", - CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i], + CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i], sizeof(&adapter->queues[i]), ixgbe_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", + CTLFLAG_RD, &(adapter->queues[i].irqs), + "irqs on this queue"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdh_handler, "IU", Modified: user/luigi/netmap/sys/dev/netmap/ixgbe_netmap.h ============================================================================== --- user/luigi/netmap/sys/dev/netmap/ixgbe_netmap.h Fri Jan 20 12:59:12 2012 (r230377) +++ user/luigi/netmap/sys/dev/netmap/ixgbe_netmap.h Fri Jan 20 13:10:40 2012 (r230378) @@ -191,6 +191,10 @@ fail: * (this is also true for every use of ring in the kernel). * * ring->avail is never used, only checked for bogus values. + * + * If do_lock is set, it means the function has been called from the ioctl + * handler: in this particular case, do_lock has also the special meaning of + * force the update of NIC registers */ static int ixgbe_netmap_txsync(void *a, u_int ring_nr, int do_lock) @@ -292,10 +296,11 @@ ring_reset: * need this. */ curr->read.buffer_addr = htole64(paddr); - curr->read.olinfo_status = 0; + curr->read.olinfo_status = htole32(len << IXGBE_ADVTXD_PAYLEN_SHIFT); curr->read.cmd_type_len = htole32(txr->txd_cmd | len | (IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | // XXX IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP | flags) ); /* If the buffer has changed, unload and reload map @@ -336,7 +341,29 @@ ring_reset: * (meaning that probably the caller really wanted to check * for completed transmissions). */ - if (n == 0 || kring->nr_hwavail < 1) { + if (do_lock) { + kring->nr_kflags = kring->nkr_num_slots; // filter interrupts + j = 1; // force read + } else if (kring->nr_hwavail > 0) { // no need to block + kring->nr_kflags = kring->nkr_num_slots; // filter interrupts + j = 0; + } else { + struct ixgbe_legacy_tx_desc *txd = (struct ixgbe_legacy_tx_desc *)txr->tx_base; + + // wake me up every half ring (more or less) + j = txr->next_to_clean + kring->nkr_num_slots/2; + if (j >= kring->nkr_num_slots) + j -= kring->nkr_num_slots; + // round to the closest with dd set + j= (j < kring->nkr_num_slots / 4 || j >= kring->nkr_num_slots*3/4) ? + 0 : report_frequency; + kring->nr_kflags = j; // remember where to look at in the interrupt + // now check if we have data ready + j = txd[j].upper.fields.status & IXGBE_TXD_STAT_DD; + } + if (!j) { + netmap_skip_txsync++; + } else { int delta; /* @@ -362,6 +389,7 @@ ring_reset: /* some tx completed, increment avail */ if (delta < 0) delta += kring->nkr_num_slots; + netmap_delta[ring_nr] = (netmap_delta[ring_nr] * 15 + delta)/16; txr->next_to_clean = l; kring->nr_hwavail += delta; if (kring->nr_hwavail > lim) @@ -391,6 +419,8 @@ ring_reset: * We must subtract the newly consumed slots (cur - nr_hwcur) * from nr_hwavail, make the descriptors available for the next reads, * and set kring->nr_hwcur = ring->cur and ring->avail = kring->nr_hwavail. + * + * do_lock has a special meaning: please refer to txsync. */ static int ixgbe_netmap_rxsync(void *a, u_int ring_nr, int do_lock) @@ -401,6 +431,7 @@ ixgbe_netmap_rxsync(void *a, u_int ring_ struct netmap_kring *kring = &na->rx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; int j, k, l, n, lim = kring->nkr_num_slots - 1; + int force_update = do_lock || kring->nr_kflags & NKR_PENDINTR; k = ring->cur; /* cache and check value, same as in txsync */ n = k - kring->nr_hwcur; @@ -437,12 +468,14 @@ ixgbe_netmap_rxsync(void *a, u_int ring_ if (j > lim) j -= lim + 1; + if (force_update) { for (n = 0; ; n++) { union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l]; uint32_t staterr = le32toh(curr->wb.upper.status_error); if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; + // XXX add -4 if crcstrip ring->slot[j].len = le16toh(curr->wb.upper.length); bus_dmamap_sync(rxr->ptag, rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD); @@ -453,6 +486,8 @@ ixgbe_netmap_rxsync(void *a, u_int ring_ rxr->next_to_check = l; kring->nr_hwavail += n; } + kring->nr_kflags &= ~NKR_PENDINTR; + } /* * Skip past packets that userspace has already processed Modified: user/luigi/netmap/sys/dev/netmap/netmap.c ============================================================================== --- user/luigi/netmap/sys/dev/netmap/netmap.c Fri Jan 20 12:59:12 2012 (r230377) +++ user/luigi/netmap/sys/dev/netmap/netmap.c Fri Jan 20 13:10:40 2012 (r230378) @@ -146,6 +146,17 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, total_ CTLFLAG_RD, &nm_buf_pool.total_buffers, 0, "total_buffers"); SYSCTL_INT(_dev_netmap, OID_AUTO, free_buffers, CTLFLAG_RD, &nm_buf_pool.free, 0, "free_buffers"); +int netmap_mitigate = 1; +SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, ""); +int netmap_skip_txsync; +SYSCTL_INT(_dev_netmap, OID_AUTO, skip_txsync, CTLFLAG_RW, &netmap_skip_txsync, 0, ""); +int netmap_skip_rxsync; +SYSCTL_INT(_dev_netmap, OID_AUTO, skip_rxsync, CTLFLAG_RW, &netmap_skip_rxsync, 0, ""); +int netmap_delta[8]; +SYSCTL_INT(_dev_netmap, OID_AUTO, delta0, CTLFLAG_RW, &netmap_delta[0], 0, ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, delta1, CTLFLAG_RW, &netmap_delta[1], 0, ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, delta2, CTLFLAG_RW, &netmap_delta[2], 0, ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, delta3, CTLFLAG_RW, &netmap_delta[3], 0, ""); /* * Allocate n buffers from the ring, and fill the slot. Modified: user/luigi/netmap/sys/dev/netmap/netmap_kern.h ============================================================================== --- user/luigi/netmap/sys/dev/netmap/netmap_kern.h Fri Jan 20 12:59:12 2012 (r230377) +++ user/luigi/netmap/sys/dev/netmap/netmap_kern.h Fri Jan 20 13:10:40 2012 (r230378) @@ -65,7 +65,8 @@ struct netmap_kring { struct netmap_ring *ring; u_int nr_hwcur; int nr_hwavail; - u_int nr_kflags; + u_int nr_kflags; /* private driver flags */ +#define NKR_PENDINTR 0x1 // Pending interrupt. u_int nkr_num_slots; int nkr_hwofs; /* offset between NIC and netmap ring */ @@ -171,6 +172,9 @@ struct netmap_slot *netmap_reset(struct enum txrx tx, int n, u_int new_cur); int netmap_ring_reinit(struct netmap_kring *); +extern int netmap_mitigate; +extern int netmap_skip_txsync, netmap_skip_rxsync; +extern int netmap_delta[8]; extern u_int netmap_total_buffers; extern char *netmap_buffer_base; extern int netmap_verbose; // XXX debugging
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201201201310.q0KDAfEb042963>