Date: Thu, 20 Sep 2012 01:23:54 +0000 (UTC) From: Scott Long <scottl@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r240718 - stable/9/sys/dev/ixgbe Message-ID: <201209200123.q8K1NsPV013979@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: scottl Date: Thu Sep 20 01:23:54 2012 New Revision: 240718 URL: http://svn.freebsd.org/changeset/base/240718 Log: Sync the ixgbe driver from HEAD to stable/9 r236627 - Fix driver deadlock due to OACTIVE flag r236729 - Fix prefetch programming typo r239940 - Improve small RX packet performance r240155 - Fix missing braces in PHY configuration r240366 - Remove a prefetch directive that hurts performance Approved by: jfv Obtained from: Netflix, inc. Modified: stable/9/sys/dev/ixgbe/ixgbe.c stable/9/sys/dev/ixgbe/ixgbe.h stable/9/sys/dev/ixgbe/ixgbe_osdep.h Directory Properties: stable/9/sys/dev/ixgbe/ (props changed) Modified: stable/9/sys/dev/ixgbe/ixgbe.c ============================================================================== --- stable/9/sys/dev/ixgbe/ixgbe.c Thu Sep 20 00:51:09 2012 (r240717) +++ stable/9/sys/dev/ixgbe/ixgbe.c Thu Sep 20 01:23:54 2012 (r240718) @@ -1145,7 +1145,7 @@ ixgbe_init_locked(struct adapter *adapte * from the Intel linux driver 3.8.21. * Prefetching enables tx line rate even with 1 queue. */ - txdctl |= (16 << 0) | (1 << 8); + txdctl |= (32 << 0) | (1 << 8); IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl); } @@ -1390,7 +1390,7 @@ ixgbe_handle_que(void *context, int pend ixgbe_start_locked(txr, ifp); #endif IXGBE_TX_UNLOCK(txr); - if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) { + if (more) { taskqueue_enqueue(que->tq, &que->que_task); return; } @@ -3698,21 +3698,30 @@ no_split: mp = rxbuf->m_pack; mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; - /* Get the memory mapping */ - error = bus_dmamap_load_mbuf_sg(rxr->ptag, - rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); - if (error != 0) { - printf("Refresh mbufs: payload dmamap load" - " failure - %d\n", error); - m_free(mp); - rxbuf->m_pack = NULL; - goto update; + + /* If we're dealing with an mbuf that was copied rather + * than replaced, there's no need to go through busdma. + */ + if ((rxbuf->flags & IXGBE_RX_COPY) == 0) { + /* Get the memory mapping */ + error = bus_dmamap_load_mbuf_sg(rxr->ptag, + rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); + if (error != 0) { + printf("Refresh mbufs: payload dmamap load" + " failure - %d\n", error); + m_free(mp); + rxbuf->m_pack = NULL; + goto update; + } + rxbuf->m_pack = mp; + bus_dmamap_sync(rxr->ptag, rxbuf->pmap, + BUS_DMASYNC_PREREAD); + rxbuf->paddr = rxr->rx_base[i].read.pkt_addr = + htole64(pseg[0].ds_addr); + } else { + rxr->rx_base[i].read.pkt_addr = rxbuf->paddr; + rxbuf->flags &= ~IXGBE_RX_COPY; } - rxbuf->m_pack = mp; - bus_dmamap_sync(rxr->ptag, rxbuf->pmap, - BUS_DMASYNC_PREREAD); - rxr->rx_base[i].read.pkt_addr = - htole64(pseg[0].ds_addr); refreshed = TRUE; /* Next is precalculated */ @@ -4025,6 +4034,7 @@ skip_head: rxr->next_to_refresh = 0; rxr->lro_enabled = FALSE; rxr->rx_split_packets = 0; + rxr->rx_copies = 0; rxr->rx_bytes = 0; rxr->discard = FALSE; rxr->vtag_strip = FALSE; @@ -4580,14 +4590,36 @@ ixgbe_rxeof(struct ix_queue *que, int co ** that determines what we are */ sendmp = rbuf->fmp; - rbuf->m_pack = rbuf->fmp = NULL; if (sendmp != NULL) { /* secondary frag */ + rbuf->m_pack = rbuf->fmp = NULL; mp->m_flags &= ~M_PKTHDR; sendmp->m_pkthdr.len += mp->m_len; } else { + /* + * Optimize. This might be a small packet, + * maybe just a TCP ACK. Do a fast copy that + * is cache aligned into a new mbuf, and + * leave the old mbuf+cluster for re-use. + */ + if (eop && plen <= IXGBE_RX_COPY_LEN) { + sendmp = m_gethdr(M_DONTWAIT, MT_DATA); + if (sendmp != NULL) { + sendmp->m_data += + IXGBE_RX_COPY_ALIGN; + ixgbe_bcopy(mp->m_data, + sendmp->m_data, plen); + sendmp->m_len = plen; + rxr->rx_copies++; + rbuf->flags |= IXGBE_RX_COPY; + } + } + if (sendmp == NULL) { + rbuf->m_pack = rbuf->fmp = NULL; + sendmp = mp; + } + /* first desc of a non-ps chain */ - sendmp = mp; sendmp->m_flags |= M_PKTHDR; sendmp->m_pkthdr.len = mp->m_len; if (staterr & IXGBE_RXD_STAT_VP) { @@ -5438,6 +5470,9 @@ ixgbe_add_hw_stats(struct adapter *adapt SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); + SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", + CTLFLAG_RD, &rxr->rx_copies, + "Copied RX Frames"); SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); Modified: stable/9/sys/dev/ixgbe/ixgbe.h ============================================================================== --- stable/9/sys/dev/ixgbe/ixgbe.h Thu Sep 20 00:51:09 2012 (r240717) +++ stable/9/sys/dev/ixgbe/ixgbe.h Thu Sep 20 01:23:54 2012 (r240718) @@ -154,6 +154,19 @@ #define IXGBE_FC_HI 0x20000 #define IXGBE_FC_LO 0x10000 +/* + * Used for optimizing small rx mbufs. Effort is made to keep the copy + * small and aligned for the CPU L1 cache. + * + * MHLEN is typically 168 bytes, giving us 8-byte alignment. Getting + * 32 byte alignment needed for the fast bcopy results in 8 bytes being + * wasted. Getting 64 byte alignment, which _should_ be ideal for + * modern Intel CPUs, results in 40 bytes wasted and a significant drop + * in observed efficiency of the optimization, 97.9% -> 81.8%. + */ +#define IXGBE_RX_COPY_LEN 160 +#define IXGBE_RX_COPY_ALIGN (MHLEN - IXGBE_RX_COPY_LEN) + /* Keep older OS drivers building... */ #if !defined(SYSCTL_ADD_UQUAD) #define SYSCTL_ADD_UQUAD SYSCTL_ADD_QUAD @@ -245,6 +258,9 @@ struct ixgbe_rx_buf { struct mbuf *fmp; bus_dmamap_t hmap; bus_dmamap_t pmap; + u_int flags; +#define IXGBE_RX_COPY 0x01 + uint64_t paddr; }; /* @@ -339,6 +355,7 @@ struct rx_ring { /* Soft stats */ u64 rx_irq; u64 rx_split_packets; + u64 rx_copies; u64 rx_packets; u64 rx_bytes; u64 rx_discarded; Modified: stable/9/sys/dev/ixgbe/ixgbe_osdep.h ============================================================================== --- stable/9/sys/dev/ixgbe/ixgbe_osdep.h Thu Sep 20 00:51:09 2012 (r240717) +++ stable/9/sys/dev/ixgbe/ixgbe_osdep.h Thu Sep 20 01:23:54 2012 (r240718) @@ -143,6 +143,25 @@ void prefetch(void *x) #define prefetch(x) #endif +/* + * Optimized bcopy thanks to Luigi Rizzo's investigative work. Assumes + * non-overlapping regions and 32-byte padding on both src and dst. + */ +static __inline int +ixgbe_bcopy(void *_src, void *_dst, int l) +{ + uint64_t *src = _src; + uint64_t *dst = _dst; + + for (; l > 0; l -= 32) { + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + } + return (0); +} + struct ixgbe_osdep { bus_space_tag_t mem_bus_space_tag;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201209200123.q8K1NsPV013979>