Date: Fri, 10 May 2013 16:16:33 +0000 (UTC) From: Luigi Rizzo <luigi@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r250458 - in stable/9: . sys/dev/e1000 sys/dev/ixgbe sys/dev/netmap sys/dev/re sys/net tools/tools/netmap Message-ID: <201305101616.r4AGGXm2047857@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: luigi Date: Fri May 10 16:16:33 2013 New Revision: 250458 URL: http://svnweb.freebsd.org/changeset/base/250458 Log: MFC: sync the version of netmap with the one in HEAD, including device drivers (mostly simplifying the code in the interrupt handlers). On passing, also merge r250414, which is related to netmap and the use of lem/em in virtual machines. Added: stable/9/tools/tools/netmap/nm_util.c (contents, props changed) stable/9/tools/tools/netmap/nm_util.h (contents, props changed) Modified: stable/9/README stable/9/sys/dev/e1000/if_em.c stable/9/sys/dev/e1000/if_em.h stable/9/sys/dev/e1000/if_igb.c stable/9/sys/dev/e1000/if_lem.c stable/9/sys/dev/e1000/if_lem.h stable/9/sys/dev/ixgbe/ixgbe.c stable/9/sys/dev/netmap/if_em_netmap.h stable/9/sys/dev/netmap/if_igb_netmap.h stable/9/sys/dev/netmap/if_lem_netmap.h stable/9/sys/dev/netmap/if_re_netmap.h stable/9/sys/dev/netmap/ixgbe_netmap.h stable/9/sys/dev/netmap/netmap.c stable/9/sys/dev/netmap/netmap_kern.h stable/9/sys/dev/netmap/netmap_mem2.c stable/9/sys/dev/re/if_re.c stable/9/sys/net/netmap.h stable/9/sys/net/netmap_user.h stable/9/tools/tools/netmap/Makefile stable/9/tools/tools/netmap/README stable/9/tools/tools/netmap/bridge.c stable/9/tools/tools/netmap/pcap.c stable/9/tools/tools/netmap/pkt-gen.c Modified: stable/9/README ============================================================================== --- stable/9/README Fri May 10 15:56:34 2013 (r250457) +++ stable/9/README Fri May 10 16:16:33 2013 (r250458) @@ -1,4 +1,4 @@ -This is the top level of the FreeBSD source directory. This file +$FreeBSD$ was last revised on: $FreeBSD$ Modified: stable/9/sys/dev/e1000/if_em.c ============================================================================== --- stable/9/sys/dev/e1000/if_em.c Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/e1000/if_em.c Fri May 10 16:16:33 2013 (r250458) @@ -335,6 +335,9 @@ MODULE_DEPEND(em, ether, 1, 1, 1); #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) #define M_TSO_LEN 66 +#define MAX_INTS_PER_SEC 8000 +#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) + /* Allow common code without TSO */ #ifndef CSUM_TSO #define CSUM_TSO 0 @@ -570,6 +573,11 @@ em_attach(device_t dev) &adapter->tx_abs_int_delay, E1000_REGISTER(hw, E1000_TADV), em_tx_abs_int_delay_dflt); + em_add_int_delay_sysctl(adapter, "itr", + "interrupt delay limit in usecs/4", + &adapter->tx_itr, + E1000_REGISTER(hw, E1000_ITR), + DEFAULT_ITR); /* Sysctl for limiting the amount of work done in the taskqueue */ em_set_sysctl_value(adapter, "rx_processing_limit", @@ -3803,17 +3811,9 @@ em_txeof(struct tx_ring *txr) EM_TX_LOCK_ASSERT(txr); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - - selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); - EM_TX_UNLOCK(txr); - EM_CORE_LOCK(adapter); - selwakeuppri(&na->tx_si, PI_NET); - EM_CORE_UNLOCK(adapter); - EM_TX_LOCK(txr); + if (netmap_tx_irq(ifp, txr->me | + (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT))) return; - } #endif /* DEV_NETMAP */ /* No work, make sure watchdog is off */ @@ -4254,8 +4254,6 @@ em_free_receive_buffers(struct rx_ring * * Enable receive unit. * **********************************************************************/ -#define MAX_INTS_PER_SEC 8000 -#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) static void em_initialize_receive_unit(struct adapter *adapter) @@ -4315,6 +4313,8 @@ em_initialize_receive_unit(struct adapte for (int i = 0; i < adapter->num_queues; i++, rxr++) { /* Setup the Base and Length of the Rx Descriptor Ring */ + u32 rdt = adapter->num_rx_desc - 1; /* default */ + bus_addr = rxr->rxdma.dma_paddr; E1000_WRITE_REG(hw, E1000_RDLEN(i), adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); @@ -4326,18 +4326,11 @@ em_initialize_receive_unit(struct adapte /* * an init() while a netmap client is active must * preserve the rx buffers passed to userspace. - * In this driver it means we adjust RDT to - * something different from na->num_rx_desc - 1. */ - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[i]; - int t = na->num_rx_desc - 1 - kring->nr_hwavail; - - E1000_WRITE_REG(hw, E1000_RDT(i), t); - } else + if (ifp->if_capenable & IFCAP_NETMAP) + rdt -= NA(adapter->ifp)->rx_rings[i].nr_hwavail; #endif /* DEV_NETMAP */ - E1000_WRITE_REG(hw, E1000_RDT(i), adapter->num_rx_desc - 1); + E1000_WRITE_REG(hw, E1000_RDT(i), rdt); } /* Set PTHRESH for improved jumbo performance */ @@ -4414,17 +4407,8 @@ em_rxeof(struct rx_ring *rxr, int count, EM_RX_LOCK(rxr); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - - na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR; - selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); - EM_RX_UNLOCK(rxr); - EM_CORE_LOCK(adapter); - selwakeuppri(&na->rx_si, PI_NET); - EM_CORE_UNLOCK(adapter); - return (0); - } + if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed)) + return (FALSE); #endif /* DEV_NETMAP */ for (i = rxr->next_to_check, processed = 0; count != 0;) { @@ -5622,6 +5606,8 @@ em_sysctl_int_delay(SYSCTL_HANDLER_ARGS) return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); + if (info->offset == E1000_ITR) /* units are 256ns here */ + ticks *= 4; adapter = info->adapter; Modified: stable/9/sys/dev/e1000/if_em.h ============================================================================== --- stable/9/sys/dev/e1000/if_em.h Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/e1000/if_em.h Fri May 10 16:16:33 2013 (r250458) @@ -429,6 +429,7 @@ struct adapter { struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; + struct em_int_delay_info tx_itr; /* Misc stats maintained by the driver */ unsigned long dropped_pkts; Modified: stable/9/sys/dev/e1000/if_igb.c ============================================================================== --- stable/9/sys/dev/e1000/if_igb.c Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/e1000/if_igb.c Fri May 10 16:16:33 2013 (r250458) @@ -3872,17 +3872,9 @@ igb_txeof(struct tx_ring *txr) IGB_TX_LOCK_ASSERT(txr); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - - selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); - IGB_TX_UNLOCK(txr); - IGB_CORE_LOCK(adapter); - selwakeuppri(&na->tx_si, PI_NET); - IGB_CORE_UNLOCK(adapter); - IGB_TX_LOCK(txr); - return FALSE; - } + if (netmap_tx_irq(ifp, txr->me | + (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT))) + return (FALSE); #endif /* DEV_NETMAP */ if (txr->tx_avail == adapter->num_tx_desc) { txr->queue_status = IGB_QUEUE_IDLE; @@ -4736,17 +4728,8 @@ igb_rxeof(struct igb_queue *que, int cou BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - - na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR; - selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); - IGB_RX_UNLOCK(rxr); - IGB_CORE_LOCK(adapter); - selwakeuppri(&na->rx_si, PI_NET); - IGB_CORE_UNLOCK(adapter); - return (0); - } + if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed)) + return (FALSE); #endif /* DEV_NETMAP */ /* Main clean loop */ Modified: stable/9/sys/dev/e1000/if_lem.c ============================================================================== --- stable/9/sys/dev/e1000/if_lem.c Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/e1000/if_lem.c Fri May 10 16:16:33 2013 (r250458) @@ -281,6 +281,9 @@ MODULE_DEPEND(lem, ether, 1, 1, 1); #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) +#define MAX_INTS_PER_SEC 8000 +#define DEFAULT_ITR (1000000000/(MAX_INTS_PER_SEC * 256)) + static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV); static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR); static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV); @@ -442,6 +445,11 @@ lem_attach(device_t dev) &adapter->tx_abs_int_delay, E1000_REGISTER(&adapter->hw, E1000_TADV), lem_tx_abs_int_delay_dflt); + lem_add_int_delay_sysctl(adapter, "itr", + "interrupt delay limit in usecs/4", + &adapter->tx_itr, + E1000_REGISTER(&adapter->hw, E1000_ITR), + DEFAULT_ITR); } /* Sysctls for limiting the amount of work done in the taskqueue */ @@ -1337,12 +1345,16 @@ lem_handle_rxtx(void *context, int pendi if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - lem_rxeof(adapter, adapter->rx_process_limit, NULL); + bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL); EM_TX_LOCK(adapter); lem_txeof(adapter); if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) lem_start_locked(ifp); EM_TX_UNLOCK(adapter); + if (more) { + taskqueue_enqueue(adapter->tq, &adapter->rxtx_task); + return; + } } if (ifp->if_drv_flags & IFF_DRV_RUNNING) @@ -2955,10 +2967,8 @@ lem_txeof(struct adapter *adapter) EM_TX_LOCK_ASSERT(adapter); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - selwakeuppri(&NA(ifp)->tx_rings[0].si, PI_NET); + if (netmap_tx_irq(ifp, 0 | (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT))) return; - } #endif /* DEV_NETMAP */ if (adapter->num_tx_desc_avail == adapter->num_tx_desc) return; @@ -3246,8 +3256,6 @@ lem_setup_receive_structures(struct adap * Enable receive unit. * **********************************************************************/ -#define MAX_INTS_PER_SEC 8000 -#define DEFAULT_ITR 1000000000/(MAX_INTS_PER_SEC * 256) static void lem_initialize_receive_unit(struct adapter *adapter) @@ -3338,19 +3346,13 @@ lem_initialize_receive_unit(struct adapt * Tail Descriptor Pointers */ E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0); + rctl = adapter->num_rx_desc - 1; /* default RDT value */ #ifdef DEV_NETMAP /* preserve buffers already made available to clients */ - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(adapter->ifp); - struct netmap_kring *kring = &na->rx_rings[0]; - int t = na->num_rx_desc - 1 - kring->nr_hwavail; - - if (t >= na->num_rx_desc) - t -= na->num_rx_desc; - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), t); - } else + if (ifp->if_capenable & IFCAP_NETMAP) + rctl -= NA(adapter->ifp)->rx_rings[0].nr_hwavail; #endif /* DEV_NETMAP */ - E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), adapter->num_rx_desc - 1); + E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl); return; } @@ -3434,13 +3436,8 @@ lem_rxeof(struct adapter *adapter, int c BUS_DMASYNC_POSTREAD); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - struct netmap_adapter *na = NA(ifp); - na->rx_rings[0].nr_kflags |= NKR_PENDINTR; - selwakeuppri(&na->rx_rings[0].si, PI_NET); - EM_RX_UNLOCK(adapter); - return (0); - } + if (netmap_rx_irq(ifp, 0 | NETMAP_LOCKED_ENTER, &rx_sent)) + return (FALSE); #endif /* DEV_NETMAP */ if (!((current_desc->status) & E1000_RXD_STAT_DD)) { @@ -4584,6 +4581,8 @@ lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS return (EINVAL); info->value = usecs; ticks = EM_USECS_TO_TICKS(usecs); + if (info->offset == E1000_ITR) /* units are 256ns here */ + ticks *= 4; adapter = info->adapter; Modified: stable/9/sys/dev/e1000/if_lem.h ============================================================================== --- stable/9/sys/dev/e1000/if_lem.h Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/e1000/if_lem.h Fri May 10 16:16:33 2013 (r250458) @@ -363,6 +363,7 @@ struct adapter { struct em_int_delay_info tx_abs_int_delay; struct em_int_delay_info rx_int_delay; struct em_int_delay_info rx_abs_int_delay; + struct em_int_delay_info tx_itr; /* * Transmit definitions Modified: stable/9/sys/dev/ixgbe/ixgbe.c ============================================================================== --- stable/9/sys/dev/ixgbe/ixgbe.c Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/ixgbe/ixgbe.c Fri May 10 16:16:33 2013 (r250458) @@ -3573,13 +3573,8 @@ ixgbe_txeof(struct tx_ring *txr) if (!netmap_mitigate || (kring->nr_kflags < kring->nkr_num_slots && txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) { - kring->nr_kflags = kring->nkr_num_slots; - selwakeuppri(&na->tx_rings[txr->me].si, PI_NET); - IXGBE_TX_UNLOCK(txr); - IXGBE_CORE_LOCK(adapter); - selwakeuppri(&na->tx_si, PI_NET); - IXGBE_CORE_UNLOCK(adapter); - IXGBE_TX_LOCK(txr); + netmap_tx_irq(ifp, txr->me | + (NETMAP_LOCKED_ENTER|NETMAP_LOCKED_EXIT)); } return FALSE; } @@ -4364,23 +4359,9 @@ ixgbe_rxeof(struct ix_queue *que) IXGBE_RX_LOCK(rxr); #ifdef DEV_NETMAP - if (ifp->if_capenable & IFCAP_NETMAP) { - /* - * Same as the txeof routine: only wakeup clients on intr. - * NKR_PENDINTR in nr_kflags is used to implement interrupt - * mitigation (ixgbe_rxsync() will not look for new packets - * unless NKR_PENDINTR is set). - */ - struct netmap_adapter *na = NA(ifp); - - na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR; - selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET); - IXGBE_RX_UNLOCK(rxr); - IXGBE_CORE_LOCK(adapter); - selwakeuppri(&na->rx_si, PI_NET); - IXGBE_CORE_UNLOCK(adapter); + /* Same as the txeof routine: wakeup clients on intr. */ + if (netmap_rx_irq(ifp, rxr->me | NETMAP_LOCKED_ENTER, &processed)) return (FALSE); - } #endif /* DEV_NETMAP */ for (i = rxr->next_to_check; count != 0;) { struct mbuf *sendmp, *mp; Modified: stable/9/sys/dev/netmap/if_em_netmap.h ============================================================================== --- stable/9/sys/dev/netmap/if_em_netmap.h Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/netmap/if_em_netmap.h Fri May 10 16:16:33 2013 (r250458) @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: if_em_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * * netmap support for em. * @@ -277,7 +276,7 @@ em_netmap_rxsync(struct ifnet *ifp, u_in k = ring->cur; if (k > lim) return netmap_ring_reinit(kring); - + if (do_lock) EM_RX_LOCK(rxr); Modified: stable/9/sys/dev/netmap/if_igb_netmap.h ============================================================================== --- stable/9/sys/dev/netmap/if_igb_netmap.h Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/netmap/if_igb_netmap.h Fri May 10 16:16:33 2013 (r250458) @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: if_igb_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * * Netmap support for igb, partly contributed by Ahmed Kooli * For details on netmap support please see ixgbe_netmap.h Modified: stable/9/sys/dev/netmap/if_lem_netmap.h ============================================================================== --- stable/9/sys/dev/netmap/if_lem_netmap.h Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/netmap/if_lem_netmap.h Fri May 10 16:16:33 2013 (r250458) @@ -26,7 +26,6 @@ /* * $FreeBSD$ - * $Id: if_lem_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * * netmap support for "lem" * Modified: stable/9/sys/dev/netmap/if_re_netmap.h ============================================================================== --- stable/9/sys/dev/netmap/if_re_netmap.h Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/netmap/if_re_netmap.h Fri May 10 16:16:33 2013 (r250458) @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: if_re_netmap.h 10609 2012-02-22 19:44:58Z luigi $ * * netmap support for "re" * For details on netmap support please see ixgbe_netmap.h @@ -151,7 +150,7 @@ re_netmap_txsync(struct ifnet *ifp, u_in /* update avail to what the kernel knows */ ring->avail = kring->nr_hwavail; - + j = kring->nr_hwcur; if (j != k) { /* we have new packets to send */ l = sc->rl_ldata.rl_tx_prodidx; @@ -170,7 +169,7 @@ re_netmap_txsync(struct ifnet *ifp, u_in // XXX what about prodidx ? return netmap_ring_reinit(kring); } - + if (l == lim) /* mark end of ring */ cmd |= RL_TDESC_CMD_EOR; @@ -335,7 +334,7 @@ re_netmap_rxsync(struct ifnet *ifp, u_in */ static void re_netmap_tx_init(struct rl_softc *sc) -{ +{ struct rl_txdesc *txd; struct rl_desc *desc; int i, n; Modified: stable/9/sys/dev/netmap/ixgbe_netmap.h ============================================================================== --- stable/9/sys/dev/netmap/ixgbe_netmap.h Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/netmap/ixgbe_netmap.h Fri May 10 16:16:33 2013 (r250458) @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: ixgbe_netmap.h 10627 2012-02-23 19:37:15Z luigi $ * * netmap modifications for ixgbe * @@ -226,7 +225,8 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->tx_rings[ring_nr]; struct netmap_ring *ring = kring->ring; - u_int j, k = ring->cur, l, n = 0, lim = kring->nkr_num_slots - 1; + u_int j, l, n = 0; + u_int const k = ring->cur, lim = kring->nkr_num_slots - 1; /* * ixgbe can generate an interrupt on every tx packet, but it @@ -393,11 +393,10 @@ ring_reset: if (ix_use_dd) { struct ixgbe_legacy_tx_desc *txd = (struct ixgbe_legacy_tx_desc *)txr->tx_base; - + u_int k1 = netmap_idx_k2n(kring, kring->nr_hwcur); l = txr->next_to_clean; - k = netmap_idx_k2n(kring, kring->nr_hwcur); delta = 0; - while (l != k && + while (l != k1 && txd[l].upper.fields.status & IXGBE_TXD_STAT_DD) { delta++; l = (l == lim) ? 0 : l + 1; Modified: stable/9/sys/dev/netmap/netmap.c ============================================================================== --- stable/9/sys/dev/netmap/netmap.c Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/netmap/netmap.c Fri May 10 16:16:33 2013 (r250458) @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mman.h> /* PROT_EXEC */ #include <sys/poll.h> #include <sys/proc.h> +#include <sys/rwlock.h> #include <vm/vm.h> /* vtophys */ #include <vm/pmap.h> /* vtophys */ #include <sys/socket.h> /* sockaddrs */ @@ -98,6 +99,7 @@ MALLOC_DEFINE(M_NETMAP, "netmap", "Netwo #include <net/netmap.h> #include <dev/netmap/netmap_kern.h> +/* XXX the following variables must be deprecated and included in nm_mem */ u_int netmap_total_buffers; u_int netmap_buf_size; char *netmap_buffer_base; /* address of an invalid buffer */ @@ -121,12 +123,10 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, no_pen int netmap_drop = 0; /* debugging */ int netmap_flags = 0; /* debug flags */ int netmap_fwd = 0; /* force transparent mode */ -int netmap_copy = 0; /* debugging, copy content */ SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , ""); #ifdef NM_BRIDGE /* support for netmap bridge */ @@ -147,22 +147,33 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, copy, #define NM_BDG_HASH 1024 /* forwarding table entries */ #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ #define NM_BRIDGES 4 /* number of bridges */ + + int netmap_bridge = NM_BDG_BATCH; /* bridge batch size */ SYSCTL_INT(_dev_netmap, OID_AUTO, bridge, CTLFLAG_RW, &netmap_bridge, 0 , ""); #ifdef linux -#define ADD_BDG_REF(ifp) (NA(ifp)->if_refcount++) -#define DROP_BDG_REF(ifp) (NA(ifp)->if_refcount-- <= 1) + +#define refcount_acquire(_a) atomic_add(1, (atomic_t *)_a) +#define refcount_release(_a) atomic_dec_and_test((atomic_t *)_a) + #else /* !linux */ -#define ADD_BDG_REF(ifp) (ifp)->if_refcount++ -#define DROP_BDG_REF(ifp) refcount_release(&(ifp)->if_refcount) + #ifdef __FreeBSD__ #include <sys/endian.h> #include <sys/refcount.h> #endif /* __FreeBSD__ */ + #define prefetch(x) __builtin_prefetch(x) + #endif /* !linux */ +/* + * These are used to handle reference counters for bridge ports. + */ +#define ADD_BDG_REF(ifp) refcount_acquire(&NA(ifp)->na_bdg_refcount) +#define DROP_BDG_REF(ifp) refcount_release(&NA(ifp)->na_bdg_refcount) + static void bdg_netmap_attach(struct ifnet *ifp); static int bdg_netmap_reg(struct ifnet *ifp, int onoff); /* per-tx-queue entry */ @@ -179,9 +190,14 @@ struct nm_hash_ent { }; /* - * Interfaces for a bridge are all in ports[]. + * Interfaces for a bridge are all in bdg_ports[]. * The array has fixed size, an empty entry does not terminate - * the search. + * the search. But lookups only occur on attach/detach so we + * don't mind if they are slow. + * + * The bridge is non blocking on the transmit ports. + * + * bdg_lock protects accesses to the bdg_ports array. */ struct nm_bridge { struct ifnet *bdg_ports[NM_BDG_MAXPORTS]; @@ -297,7 +313,7 @@ netmap_update_config(struct netmap_adapt txd = na->num_tx_desc; rxr = na->num_rx_rings; rxd = na->num_rx_desc; - } + } if (na->num_tx_rings == txr && na->num_tx_desc == txd && na->num_rx_rings == rxr && na->num_rx_desc == rxd) @@ -323,11 +339,7 @@ netmap_update_config(struct netmap_adapt } /*------------- memory allocator -----------------*/ -#ifdef NETMAP_MEM2 #include "netmap_mem2.c" -#else /* !NETMAP_MEM2 */ -#include "netmap_mem1.c" -#endif /* !NETMAP_MEM2 */ /*------------ end of memory allocator ----------*/ @@ -497,16 +509,16 @@ netmap_dtor(void *data) { struct netmap_priv_d *priv = data; struct ifnet *ifp = priv->np_ifp; - struct netmap_adapter *na; NMA_LOCK(); if (ifp) { - na = NA(ifp); + struct netmap_adapter *na = NA(ifp); + na->nm_lock(ifp, NETMAP_REG_LOCK, 0); netmap_dtor_locked(data); na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0); - nm_if_rele(ifp); + nm_if_rele(ifp); /* might also destroy *na */ } if (priv->ref_done) { netmap_memory_deref(); @@ -1668,19 +1680,25 @@ netmap_attach(struct netmap_adapter *arg ND("using default locks for %s", ifp->if_xname); na->nm_lock = netmap_lock_wrapper; } + #ifdef linux - if (ifp->netdev_ops) { - ND("netdev_ops %p", ifp->netdev_ops); - /* prepare a clone of the netdev ops */ - na->nm_ndo = *ifp->netdev_ops; + if (!ifp->netdev_ops) { + D("ouch, we cannot override netdev_ops"); + goto fail; } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) + /* if needed, prepare a clone of the entire netdev ops */ + na->nm_ndo = *ifp->netdev_ops; +#endif /* 2.6.28 and above */ na->nm_ndo.ndo_start_xmit = linux_netmap_start; -#endif +#endif /* linux */ + D("success for %s", ifp->if_xname); return 0; fail: D("fail, arg %p ifp %p na %p", arg, ifp, na); + netmap_detach(ifp); return (na ? EINVAL : ENOMEM); } @@ -1726,17 +1744,18 @@ netmap_start(struct ifnet *ifp, struct m if (netmap_verbose & NM_VERB_HOST) D("%s packet %d len %d from the stack", ifp->if_xname, kring->nr_hwcur + kring->nr_hwavail, len); + if (len > NETMAP_BUF_SIZE) { /* too long for us */ + D("%s from_host, drop packet size %d > %d", ifp->if_xname, + len, NETMAP_BUF_SIZE); + m_freem(m); + return EINVAL; + } na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); if (kring->nr_hwavail >= lim) { if (netmap_verbose) D("stack ring %s full\n", ifp->if_xname); goto done; /* no space */ } - if (len > NETMAP_BUF_SIZE) { - D("%s from_host, drop packet size %d > %d", ifp->if_xname, - len, NETMAP_BUF_SIZE); - goto done; /* too long for us */ - } /* compute the insert position */ i = kring->nr_hwcur + kring->nr_hwavail; @@ -1837,6 +1856,10 @@ netmap_reset(struct netmap_adapter *na, * N rings, separate locks: * lock(i); wake(i); unlock(i); lock(core) wake(N+1) unlock(core) * work_done is non-null on the RX path. + * + * The 'q' argument also includes flag to tell whether the queue is + * already locked on enter, and whether it should remain locked on exit. + * This helps adapting to different defaults in drivers and OSes. */ int netmap_rx_irq(struct ifnet *ifp, int q, int *work_done) @@ -1844,9 +1867,14 @@ netmap_rx_irq(struct ifnet *ifp, int q, struct netmap_adapter *na; struct netmap_kring *r; NM_SELINFO_T *main_wq; + int locktype, unlocktype, lock; if (!(ifp->if_capenable & IFCAP_NETMAP)) return 0; + + lock = q & (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT); + q = q & NETMAP_RING_MASK; + ND(5, "received %s queue %d", work_done ? "RX" : "TX" , q); na = NA(ifp); if (na->na_flags & NAF_SKIP_INTR) { @@ -1856,32 +1884,42 @@ netmap_rx_irq(struct ifnet *ifp, int q, if (work_done) { /* RX path */ if (q >= na->num_rx_rings) - return 0; // regular queue + return 0; // not a physical queue r = na->rx_rings + q; r->nr_kflags |= NKR_PENDINTR; main_wq = (na->num_rx_rings > 1) ? &na->rx_si : NULL; - } else { /* tx path */ + locktype = NETMAP_RX_LOCK; + unlocktype = NETMAP_RX_UNLOCK; + } else { /* TX path */ if (q >= na->num_tx_rings) - return 0; // regular queue + return 0; // not a physical queue r = na->tx_rings + q; main_wq = (na->num_tx_rings > 1) ? &na->tx_si : NULL; work_done = &q; /* dummy */ + locktype = NETMAP_TX_LOCK; + unlocktype = NETMAP_TX_UNLOCK; } if (na->separate_locks) { - mtx_lock(&r->q_lock); + if (!(lock & NETMAP_LOCKED_ENTER)) + na->nm_lock(ifp, locktype, q); selwakeuppri(&r->si, PI_NET); - mtx_unlock(&r->q_lock); + na->nm_lock(ifp, unlocktype, q); if (main_wq) { - mtx_lock(&na->core_lock); + na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); selwakeuppri(main_wq, PI_NET); - mtx_unlock(&na->core_lock); + na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0); } + /* lock the queue again if requested */ + if (lock & NETMAP_LOCKED_EXIT) + na->nm_lock(ifp, locktype, q); } else { - mtx_lock(&na->core_lock); + if (!(lock & NETMAP_LOCKED_ENTER)) + na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); selwakeuppri(&r->si, PI_NET); if (main_wq) selwakeuppri(main_wq, PI_NET); - mtx_unlock(&na->core_lock); + if (!(lock & NETMAP_LOCKED_EXIT)) + na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0); } *work_done = 1; /* do not fire napi again */ return 1; @@ -1902,7 +1940,9 @@ netmap_rx_irq(struct ifnet *ifp, int q, static u_int linux_netmap_poll(struct file * file, struct poll_table_struct *pwait) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + int events = POLLIN | POLLOUT; /* XXX maybe... */ +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) int events = pwait ? pwait->key : POLLIN | POLLOUT; #else /* in 3.4.0 field 'key' was renamed to '_key' */ int events = pwait ? pwait->_key : POLLIN | POLLOUT; @@ -1942,7 +1982,7 @@ linux_netmap_mmap(struct file *f, struct * vtophys mapping in lut[k] so we use that, scanning * the lut[] array in steps of clustentries, * and we map each cluster (not individual pages, - * it would be overkill). + * it would be overkill -- XXX slow ? 20130415). */ /* Modified: stable/9/sys/dev/netmap/netmap_kern.h ============================================================================== --- stable/9/sys/dev/netmap/netmap_kern.h Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/netmap/netmap_kern.h Fri May 10 16:16:33 2013 (r250458) @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -25,7 +25,6 @@ /* * $FreeBSD$ - * $Id: netmap_kern.h 11829 2012-09-26 04:06:34Z luigi $ * * The header contains the definitions of constants and function * prototypes used only in kernelspace. @@ -34,9 +33,8 @@ #ifndef _NET_NETMAP_KERN_H_ #define _NET_NETMAP_KERN_H_ -#define NETMAP_MEM2 // use the new memory allocator - #if defined(__FreeBSD__) + #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) @@ -44,8 +42,10 @@ #define NM_SELINFO_T struct selinfo #define MBUF_LEN(m) ((m)->m_pkthdr.len) #define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m) + #elif defined (linux) -#define NM_LOCK_T spinlock_t + +#define NM_LOCK_T safe_spinlock_t // see bsd_glue.h #define NM_SELINFO_T wait_queue_head_t #define MBUF_LEN(m) ((m)->len) #define NM_SEND_UP(ifp, m) netif_rx(m) @@ -67,6 +67,7 @@ #endif #elif defined (__APPLE__) + #warning apple support is incomplete. #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) @@ -76,8 +77,10 @@ #define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m) #else + #error unsupported platform -#endif + +#endif /* end - platform-specific code */ #define ND(format, ...) #define D(format, ...) \ @@ -207,10 +210,20 @@ struct netmap_adapter { int (*nm_config)(struct ifnet *, u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); + /* + * Bridge support: + * + * bdg_port is the port number used in the bridge; + * na_bdg_refcount is a refcount used for bridge ports, + * when it goes to 0 we can detach+free this port + * (a bridge port is always attached if it exists; + * it is not always registered) + */ int bdg_port; + int na_bdg_refcount; + #ifdef linux struct net_device_ops nm_ndo; - int if_refcount; // XXX additions for bridge #endif /* linux */ }; @@ -245,6 +258,10 @@ enum { #endif }; +/* How to handle locking support in netmap_rx_irq/netmap_tx_irq */ +#define NETMAP_LOCKED_ENTER 0x10000000 /* already locked on enter */ +#define NETMAP_LOCKED_EXIT 0x20000000 /* keep locked on exit */ + /* * The following are support routines used by individual drivers to * support netmap operation. @@ -272,7 +289,7 @@ struct netmap_slot *netmap_reset(struct int netmap_ring_reinit(struct netmap_kring *); extern u_int netmap_buf_size; -#define NETMAP_BUF_SIZE netmap_buf_size +#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove extern int netmap_mitigate; extern int netmap_no_pendintr; extern u_int netmap_total_buffers; @@ -431,20 +448,16 @@ netmap_idx_k2n(struct netmap_kring *kr, } -#ifdef NETMAP_MEM2 /* Entries of the look-up table. */ struct lut_entry { void *vaddr; /* virtual address. */ - vm_paddr_t paddr; /* phisical address. */ + vm_paddr_t paddr; /* physical address. */ }; struct netmap_obj_pool; extern struct lut_entry *netmap_buffer_lut; #define NMB_VA(i) (netmap_buffer_lut[i].vaddr) #define NMB_PA(i) (netmap_buffer_lut[i].paddr) -#else /* NETMAP_MEM1 */ -#define NMB_VA(i) (netmap_buffer_base + (i * NETMAP_BUF_SIZE) ) -#endif /* NETMAP_MEM2 */ /* * NMB return the virtual address of a buffer (buffer 0 on bad index) @@ -462,11 +475,8 @@ PNMB(struct netmap_slot *slot, uint64_t { uint32_t i = slot->buf_idx; void *ret = (i >= netmap_total_buffers) ? NMB_VA(0) : NMB_VA(i); -#ifdef NETMAP_MEM2 + *pp = (i >= netmap_total_buffers) ? NMB_PA(0) : NMB_PA(i); -#else - *pp = vtophys(ret); -#endif return ret; } @@ -474,5 +484,4 @@ PNMB(struct netmap_slot *slot, uint64_t int netmap_rx_irq(struct ifnet *, int, int *); #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL) -extern int netmap_copy; #endif /* _NET_NETMAP_KERN_H_ */ Modified: stable/9/sys/dev/netmap/netmap_mem2.c ============================================================================== --- stable/9/sys/dev/netmap/netmap_mem2.c Fri May 10 15:56:34 2013 (r250457) +++ stable/9/sys/dev/netmap/netmap_mem2.c Fri May 10 16:16:33 2013 (r250458) @@ -1,5 +1,5 @@ /* - * Copyright (C) 2012 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. + * Copyright (C) 2012-2013 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -25,22 +25,23 @@ /* * $FreeBSD$ - * $Id: netmap_mem2.c 11881 2012-10-18 23:24:15Z luigi $ * * (New) memory allocator for netmap */ /* - * This allocator creates three memory regions: + * This allocator creates three memory pools: * nm_if_pool for the struct netmap_if * nm_ring_pool for the struct netmap_ring * nm_buf_pool for the packet buffers. * - * All regions need to be multiple of a page size as we export them to - * userspace through mmap. Only the latter needs to be dma-able, + * that contain netmap objects. Each pool is made of a number of clusters, + * multiple of a page size, each containing an integer number of objects. + * The clusters are contiguous in user space but not in the kernel. + * Only nm_buf_pool needs to be dma-able, * but for convenience use the same type of allocator for all. * - * Once mapped, the three regions are exported to userspace + * Once mapped, the three pools are exported to userspace * as a contiguous block, starting from nm_if_pool. Each * cluster (and pool) is an integral number of pages. * [ . . . ][ . . . . . .][ . . . . . . . . . .] @@ -56,7 +57,7 @@ * The pool is split into smaller clusters, whose size is a * multiple of the page size. The cluster size is chosen * to minimize the waste for a given max cluster size - * (we do it by brute force, as we have relatively few object + * (we do it by brute force, as we have relatively few objects * per cluster). * * Objects are aligned to the cache line (64 bytes) rounding up object @@ -80,7 +81,7 @@ * In the worst case we have one netmap_if per ring in the system. * * struct netmap_ring - * variable too, 8 byte per slot plus some fixed amount. + * variable size, 8 byte per slot plus some fixed amount. * Rings can be large (e.g. 4k slots, or >32Kbytes). * We default to 36 KB (9 pages), and a few hundred rings. * @@ -93,16 +94,14 @@ * the size to multiple of 1K or so. Default to 2K */ -#ifndef CONSERVATIVE #define NETMAP_BUF_MAX_NUM 20*4096*2 /* large machine */ -#else /* CONSERVATIVE */ -#define NETMAP_BUF_MAX_NUM 20000 /* 40MB */ -#endif #ifdef linux +// XXX a mtx would suffice here 20130415 lr +// #define NMA_LOCK_T safe_spinlock_t #define NMA_LOCK_T struct semaphore #define NMA_LOCK_INIT() sema_init(&nm_mem.nm_mtx, 1) -#define NMA_LOCK_DESTROY() +#define NMA_LOCK_DESTROY() #define NMA_LOCK() down(&nm_mem.nm_mtx) #define NMA_UNLOCK() up(&nm_mem.nm_mtx) #else /* !linux */ @@ -178,7 +177,11 @@ struct netmap_mem_d { struct netmap_obj_pool pools[NETMAP_POOLS_NR]; }; - +/* + * nm_mem is the memory allocator used for all physical interfaces + * running in netmap mode. + * Virtual (VALE) ports will have each its own allocator. + */ static struct netmap_mem_d nm_mem = { /* Our memory allocator. */ .pools = { [NETMAP_IF_POOL] = { @@ -205,6 +208,7 @@ static struct netmap_mem_d nm_mem = { /* }, }; +// XXX logically belongs to nm_mem struct lut_entry *netmap_buffer_lut; /* exported */ /* memory allocator related sysctls */ @@ -212,12 +216,10 @@ struct lut_entry *netmap_buffer_lut; /* #define STRINGIFY(x) #x #define DECLARE_SYSCTLS(id, name) \ - /* TUNABLE_INT("hw.netmap." STRINGIFY(name) "_size", &netmap_params[id].size); */ \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \ CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \ CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \ - /* TUNABLE_INT("hw.netmap." STRINGIFY(name) "_num", &netmap_params[id].num); */ \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \ CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \ SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \ @@ -228,14 +230,12 @@ DECLARE_SYSCTLS(NETMAP_RING_POOL, ring); DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf); /* - * Convert a userspace offset to a phisical address. - * XXX re-do in a simpler way. + * Convert a userspace offset to a physical address. + * XXX only called in the FreeBSD's netmap_mmap() + * because in linux we map everything at once. * *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201305101616.r4AGGXm2047857>