From owner-svn-src-user@FreeBSD.ORG Wed Nov 5 21:44:21 2008 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 3A32C106564A; Wed, 5 Nov 2008 21:44:21 +0000 (UTC) (envelope-from gallatin@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 27E778FC20; Wed, 5 Nov 2008 21:44:21 +0000 (UTC) (envelope-from gallatin@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id mA5LiLr6037974; Wed, 5 Nov 2008 21:44:21 GMT (envelope-from gallatin@svn.freebsd.org) Received: (from gallatin@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id mA5LiLqd037972; Wed, 5 Nov 2008 21:44:21 GMT (envelope-from gallatin@svn.freebsd.org) Message-Id: <200811052144.mA5LiLqd037972@svn.freebsd.org> From: Andrew Gallatin Date: Wed, 5 Nov 2008 21:44:21 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r184702 - user/kmacy/HEAD_multi_tx/sys/dev/mxge X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 05 Nov 2008 21:44:21 -0000 Author: gallatin Date: Wed Nov 5 21:44:20 2008 New Revision: 184702 URL: http://svn.freebsd.org/changeset/base/184702 Log: A start at multi-queue transmit for mxge. Modified: user/kmacy/HEAD_multi_tx/sys/dev/mxge/if_mxge.c user/kmacy/HEAD_multi_tx/sys/dev/mxge/if_mxge_var.h Modified: user/kmacy/HEAD_multi_tx/sys/dev/mxge/if_mxge.c ============================================================================== --- user/kmacy/HEAD_multi_tx/sys/dev/mxge/if_mxge.c Wed Nov 5 20:32:13 2008 (r184701) +++ user/kmacy/HEAD_multi_tx/sys/dev/mxge/if_mxge.c Wed Nov 5 21:44:20 2008 (r184702) @@ -95,7 +95,7 @@ static int mxge_flow_control = 1; static int mxge_verbose = 0; static int mxge_lro_cnt = 8; static int mxge_ticks; -static int mxge_max_slices = 8; +static int mxge_max_slices = 1; static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; static int mxge_always_promisc = 0; static char *mxge_fw_unaligned = "mxge_ethp_z8e"; @@ -1206,7 +1206,9 @@ mxge_reset(mxge_softc_t *sc, int interru * to setting up the interrupt queue DMA */ cmd.data0 = sc->num_slices; - cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; + cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE | + MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; + status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, &cmd); if (status != 0) { @@ -1266,6 +1268,7 @@ mxge_reset(mxge_softc_t *sc, int interru ss->tx.req = 0; ss->tx.done = 0; ss->tx.pkt_done = 0; + ss->tx.queue_active = 0; ss->tx.wake = 0; ss->tx.defrag = 0; ss->tx.stall = 0; @@ -1611,10 +1614,6 @@ mxge_add_sysctls(mxge_softc_t *sc) 0, "number of frames appended to lro merge" "queues"); - /* only transmit from slice 0 for now */ - if (slice > 0) - continue; - SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_done", CTLFLAG_RD, &ss->tx.done, @@ -1857,12 +1856,18 @@ mxge_encap_tso(struct mxge_slice_state * tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; mxge_submit_req(tx, tx->req_list, cnt); + if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { + /* tell the NIC to start polling this slice */ + *tx->send_go = 1; + tx->queue_active = 1; + mb(); + } return; drop: bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); m_freem(m); - ss->sc->ifp->if_oerrors++; + ss->oerrors++; if (!once) { printf("tx->max_desc exceeded via TSO!\n"); printf("mss = %d, %ld, %d!\n", mss, @@ -2059,11 +2064,17 @@ mxge_encap(struct mxge_slice_state *ss, #endif tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; mxge_submit_req(tx, tx->req_list, cnt); + if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { + /* tell the NIC to start polling this slice */ + *tx->send_go = 1; + tx->queue_active = 1; + mb(); + } return; drop: m_freem(m); - ifp->if_oerrors++; + ss->oerrors++; return; } @@ -2076,13 +2087,15 @@ mxge_start_locked(struct mxge_slice_stat mxge_softc_t *sc; struct mbuf *m; struct ifnet *ifp; + struct ifaltq *ifq; mxge_tx_ring_t *tx; sc = ss->sc; ifp = sc->ifp; tx = &ss->tx; + ifq = &tx->ifq; while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { - IFQ_DRV_DEQUEUE(&ifp->if_snd, m); + IFQ_DRV_DEQUEUE(ifq, m); if (m == NULL) { return; } @@ -2093,25 +2106,46 @@ mxge_start_locked(struct mxge_slice_stat mxge_encap(ss, m); } /* ran out of transmit slots */ - if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { - sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; + if ((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) { + ss->if_drv_flags |= IFF_DRV_OACTIVE; tx->stall++; } } static void -mxge_start(struct ifnet *ifp) +mxge_start(struct mxge_slice_state *ss) { - mxge_softc_t *sc = ifp->if_softc; - struct mxge_slice_state *ss; - - /* only use the first slice for now */ - ss = &sc->ss[0]; mtx_lock(&ss->tx.mtx); mxge_start_locked(ss); mtx_unlock(&ss->tx.mtx); } +static int +mxge_start_mbuf(struct ifnet *ifp, struct mbuf *m) +{ + struct ifaltq *ifq; + mxge_softc_t *sc = ifp->if_softc; + struct mxge_slice_state *ss; + int slice, error, len; + short mflags; + + slice = m->m_pkthdr.rss_hash & (sc->num_slices - 1); +/* printf("%d & %d = %d\n", m->m_pkthdr.rss_hash, (sc->num_slices - 1), slice);*/ + ss = &sc->ss[slice]; + ifq = &ss->tx.ifq; + len = (m)->m_pkthdr.len; + mflags = (m)->m_flags; + IFQ_ENQUEUE(ifq, m, error); + if (error == 0) { + ss->obytes += len; + if (mflags & M_MCAST) + ss->omcasts++; + if ((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) + mxge_start(ss); + } + return (error); +} + /* * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy * at most 32 bytes at a time, so as to avoid involving the software @@ -2349,6 +2383,7 @@ mxge_rx_done_big(struct mxge_slice_state m->m_data += MXGEFW_PAD; m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.rss_hash = ss - sc->ss; m->m_len = m->m_pkthdr.len = len; ss->ipackets++; eh = mtod(m, struct ether_header *); @@ -2410,6 +2445,7 @@ mxge_rx_done_small(struct mxge_slice_sta m->m_pkthdr.rcvif = ifp; m->m_len = m->m_pkthdr.len = len; + m->m_pkthdr.rss_hash = ss - sc->ss; ss->ipackets++; eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { @@ -2480,7 +2516,7 @@ mxge_tx_done(struct mxge_slice_state *ss /* mbuf and DMA map only attached to the first segment per-mbuf */ if (m != NULL) { - ifp->if_opackets++; + ss->opackets++; tx->info[idx].m = NULL; map = tx->info[idx].map; bus_dmamap_unload(tx->dmat, map); @@ -2495,14 +2531,23 @@ mxge_tx_done(struct mxge_slice_state *ss /* If we have space, clear IFF_OACTIVE to tell the stack that its OK to send packets */ - if (ifp->if_drv_flags & IFF_DRV_OACTIVE && + if (ss->if_drv_flags & IFF_DRV_OACTIVE && tx->req - tx->done < (tx->mask + 1)/4) { mtx_lock(&ss->tx.mtx); - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + ss->if_drv_flags &= ~IFF_DRV_OACTIVE; ss->tx.wake++; mxge_start_locked(ss); mtx_unlock(&ss->tx.mtx); } + if ((ss->sc->num_slices > 1) && (tx->req == tx->done) && + mtx_trylock(&ss->tx.mtx)) { + /* let the NIC stop polling this queue, since there + * are no more transmits pending */ + *tx->send_stop = 1; + tx->queue_active = 0; + mb(); + mtx_unlock(&ss->tx.mtx); + } } static struct mxge_media_type mxge_media_types[] = @@ -2653,14 +2698,6 @@ mxge_intr(void *arg) uint8_t valid; - /* an interrupt on a non-zero slice is implicitly valid - since MSI-X irqs are not shared */ - if (ss != sc->ss) { - mxge_clean_rx_done(ss); - *ss->irq_claim = be32toh(3); - return; - } - /* make sure the DMA has finished */ if (!stats->valid) { return; @@ -2683,7 +2720,8 @@ mxge_intr(void *arg) send_done_count = be32toh(stats->send_done_count); while ((send_done_count != tx->pkt_done) || (rx_done->entry[rx_done->idx].length != 0)) { - mxge_tx_done(ss, (int)send_done_count); + if (send_done_count != tx->pkt_done) + mxge_tx_done(ss, (int)send_done_count); mxge_clean_rx_done(ss); send_done_count = be32toh(stats->send_done_count); } @@ -2691,7 +2729,8 @@ mxge_intr(void *arg) mb(); } while (*((volatile uint8_t *) &stats->valid)); - if (__predict_false(stats->stats_updated)) { + /* fw stats meaningful only on the first slice */ + if (__predict_false((ss == sc->ss) && stats->stats_updated)) { if (sc->link_state != stats->link_up) { sc->link_state = stats->link_up; if (sc->link_state) { @@ -2981,10 +3020,6 @@ mxge_alloc_slice_rings(struct mxge_slice /* now allocate TX resouces */ - /* only use a single TX ring for now */ - if (ss != ss->sc->ss) - return 0; - ss->tx.mask = tx_ring_entries - 1; ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); @@ -3043,8 +3078,11 @@ mxge_alloc_slice_rings(struct mxge_slice return err;; } } + IFQ_SET_MAXLEN(&ss->tx.ifq, tx_ring_entries - 1); + ss->tx.ifq.ifq_drv_maxlen = ss->tx.ifq.ifq_maxlen; + IFQ_SET_READY(&ss->tx.ifq); + return 0; - } static int @@ -3149,13 +3187,16 @@ mxge_slice_open(struct mxge_slice_state /* get the lanai pointers to the send and receive rings */ err = 0; - /* We currently only send from the first slice */ - if (slice == 0) { - cmd.data0 = slice; - err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); - ss->tx.lanai = - (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); - } + + cmd.data0 = slice; + err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); + ss->tx.lanai = + (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); + ss->tx.send_go = (volatile uint32_t *) + (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); + ss->tx.send_stop = (volatile uint32_t *) + (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); + cmd.data0 = slice; err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); @@ -3276,10 +3317,16 @@ mxge_open(mxge_softc_t *sc) } /* Now give him the pointer to the stats block */ - cmd.data0 = MXGE_LOWPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); - cmd.data1 = MXGE_HIGHPART_TO_U32(sc->ss->fw_stats_dma.bus_addr); - cmd.data2 = sizeof(struct mcp_irq_data); - err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); + for (slice = 0; slice < sc->num_slices; slice++) { + struct mxge_slice_state *ss = &sc->ss[slice]; + cmd.data0 = + MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); + cmd.data1 = + MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); + cmd.data2 = sizeof(struct mcp_irq_data); + cmd.data2 |= (slice << 16); + err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); + } if (err != 0) { bus = sc->ss->fw_stats_dma.bus_addr; @@ -3494,15 +3541,27 @@ mxge_update_stats(mxge_softc_t *sc) { struct mxge_slice_state *ss; u_long ipackets = 0; + u_long opackets = 0; + u_long obytes = 0; + u_long omcasts = 0; + u_long oerrors = 0; int slice; - for(slice = 0; slice < sc->num_slices; slice++) { + for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; ipackets += ss->ipackets; + opackets += ss->opackets; + obytes += ss->obytes; + omcasts += ss->omcasts; + oerrors += ss->oerrors; } sc->ifp->if_ipackets = ipackets; - + sc->ifp->if_opackets = opackets; + sc->ifp->if_obytes = obytes; + sc->ifp->if_omcasts = omcasts; + sc->ifp->if_oerrors = oerrors; } + static void mxge_tick(void *arg) { @@ -3725,6 +3784,7 @@ mxge_free_slices(mxge_softc_t *sc) mxge_dma_free(&ss->fw_stats_dma); ss->fw_stats = NULL; mtx_destroy(&ss->tx.mtx); + mtx_destroy(&ss->tx.ifq.ifq_mtx); } if (ss->rx_done.entry != NULL) { mxge_dma_free(&ss->rx_done.dma); @@ -3770,12 +3830,8 @@ mxge_alloc_slices(mxge_softc_t *sc) bzero(ss->rx_done.entry, bytes); /* - * allocate the per-slice firmware stats; stats - * (including tx) are used used only on the first - * slice for now + * allocate the per-slice firmware stats */ - if (i > 0) - continue; bytes = sizeof (*ss->fw_stats); err = mxge_dma_alloc(sc, &ss->fw_stats_dma, @@ -3786,6 +3842,9 @@ mxge_alloc_slices(mxge_softc_t *sc) snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), "%s:tx(%d)", device_get_nameunit(sc->dev), i); mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); + snprintf(ss->tx.ifq_mtx_name, sizeof(ss->tx.mtx_name), + "%s:ifp(%d)", device_get_nameunit(sc->dev), i); + mtx_init(&ss->tx.ifq.ifq_mtx, ss->tx.ifq_mtx_name, NULL, MTX_DEF); } return (0); @@ -4247,7 +4306,7 @@ mxge_attach(device_t dev) ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = mxge_ioctl; - ifp->if_start = mxge_start; +/* ifp->if_start = mxge_start;*/ /* Initialise the ifmedia structure */ ifmedia_init(&sc->media, 0, mxge_media_change, mxge_media_status); @@ -4257,6 +4316,7 @@ mxge_attach(device_t dev) /* ether_ifattach sets mtu to 1500 */ if (ifp->if_capabilities & IFCAP_JUMBO_MTU) ifp->if_mtu = 9000; + ifp->if_start_mbuf = mxge_start_mbuf; mxge_add_sysctls(sc); return 0; Modified: user/kmacy/HEAD_multi_tx/sys/dev/mxge/if_mxge_var.h ============================================================================== --- user/kmacy/HEAD_multi_tx/sys/dev/mxge/if_mxge_var.h Wed Nov 5 20:32:13 2008 (r184701) +++ user/kmacy/HEAD_multi_tx/sys/dev/mxge/if_mxge_var.h Wed Nov 5 21:44:20 2008 (r184702) @@ -126,6 +126,8 @@ typedef struct { struct mtx mtx; volatile mcp_kreq_ether_send_t *lanai; /* lanai ptr for sendq */ + volatile uint32_t *send_go; /* doorbell for sendq */ + volatile uint32_t *send_stop; /* doorbell for sendq */ mcp_kreq_ether_send_t *req_list; /* host shadow of sendq */ char *req_bytes; bus_dma_segment_t *seg_list; @@ -136,13 +138,16 @@ typedef struct int done; /* transmits completed */ int pkt_done; /* packets completed */ int max_desc; /* max descriptors per xmit */ + int queue_active; /* fw currently polling this queue*/ int stall; /* #times hw queue exhausted */ int wake; /* #times irq re-enabled xmit */ int watchdog_req; /* cache of req */ int watchdog_done; /* cache of done */ int watchdog_rx_pause; /* cache of pause rq recvd */ int defrag; + struct ifaltq ifq; char mtx_name[16]; + char ifq_mtx_name[16]; } mxge_tx_ring_t; struct lro_entry; @@ -182,6 +187,11 @@ struct mxge_slice_state { mcp_irq_data_t *fw_stats; volatile uint32_t *irq_claim; u_long ipackets; + u_long opackets; + u_long obytes; + u_long omcasts; + u_long oerrors; + int if_drv_flags; struct lro_head lro_active; struct lro_head lro_free; int lro_queued;