From owner-svn-src-head@FreeBSD.ORG Tue Apr 30 16:08:35 2013 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.FreeBSD.org [8.8.178.115]) by hub.freebsd.org (Postfix) with ESMTP id A03BCB69; Tue, 30 Apr 2013 16:08:35 +0000 (UTC) (envelope-from luigi@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id 840E21C41; Tue, 30 Apr 2013 16:08:35 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.6/8.14.6) with ESMTP id r3UG8Zi9097993; Tue, 30 Apr 2013 16:08:35 GMT (envelope-from luigi@svn.freebsd.org) Received: (from luigi@localhost) by svn.freebsd.org (8.14.6/8.14.5/Submit) id r3UG8YhV097989; Tue, 30 Apr 2013 16:08:34 GMT (envelope-from luigi@svn.freebsd.org) Message-Id: <201304301608.r3UG8YhV097989@svn.freebsd.org> From: Luigi Rizzo Date: Tue, 30 Apr 2013 16:08:34 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r250107 - head/sys/dev/netmap X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 30 Apr 2013 16:08:35 -0000 Author: luigi Date: Tue Apr 30 16:08:34 2013 New Revision: 250107 URL: http://svnweb.freebsd.org/changeset/base/250107 Log: Partial cleanup in preparation for upcoming changes: - netmap_rx_irq()/netmap_tx_irq() can now be called by FreeBSD drivers hiding the logic for handling NIC interrupts in netmap mode. This also simplifies the case of NICs attached to VALE switches. Individual drivers will be updated with separate commits. - use the same refcount() API for FreeBSD and linux - plus some comments, typos and formatting fixes Portions contributed by Michio Honda Modified: head/sys/dev/netmap/netmap.c head/sys/dev/netmap/netmap_kern.h Modified: head/sys/dev/netmap/netmap.c ============================================================================== --- head/sys/dev/netmap/netmap.c Tue Apr 30 16:00:21 2013 (r250106) +++ head/sys/dev/netmap/netmap.c Tue Apr 30 16:08:34 2013 (r250107) @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved. + * Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -123,12 +123,10 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, no_pen int netmap_drop = 0; /* debugging */ int netmap_flags = 0; /* debug flags */ int netmap_fwd = 0; /* force transparent mode */ -int netmap_copy = 0; /* debugging, copy content */ SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , ""); #ifdef NM_BRIDGE /* support for netmap bridge */ @@ -155,18 +153,27 @@ int netmap_bridge = NM_BDG_BATCH; /* bri SYSCTL_INT(_dev_netmap, OID_AUTO, bridge, CTLFLAG_RW, &netmap_bridge, 0 , ""); #ifdef linux -#define ADD_BDG_REF(ifp) (NA(ifp)->if_refcount++) -#define DROP_BDG_REF(ifp) (NA(ifp)->if_refcount-- <= 1) + +#define refcount_acquire(_a) atomic_add(1, (atomic_t *)_a) +#define refcount_release(_a) atomic_dec_and_test((atomic_t *)_a) + #else /* !linux */ -#define ADD_BDG_REF(ifp) (ifp)->if_refcount++ -#define DROP_BDG_REF(ifp) refcount_release(&(ifp)->if_refcount) + #ifdef __FreeBSD__ #include #include #endif /* __FreeBSD__ */ + #define prefetch(x) __builtin_prefetch(x) + #endif /* !linux */ +/* + * These are used to handle reference counters for bridge ports. + */ +#define ADD_BDG_REF(ifp) refcount_acquire(&NA(ifp)->na_bdg_refcount) +#define DROP_BDG_REF(ifp) refcount_release(&NA(ifp)->na_bdg_refcount) + static void bdg_netmap_attach(struct ifnet *ifp); static int bdg_netmap_reg(struct ifnet *ifp, int onoff); /* per-tx-queue entry */ @@ -183,9 +190,14 @@ struct nm_hash_ent { }; /* - * Interfaces for a bridge are all in ports[]. + * Interfaces for a bridge are all in bdg_ports[]. * The array has fixed size, an empty entry does not terminate - * the search. + * the search. But lookups only occur on attach/detach so we + * don't mind if they are slow. + * + * The bridge is non blocking on the transmit ports. + * + * bdg_lock protects accesses to the bdg_ports array. */ struct nm_bridge { struct ifnet *bdg_ports[NM_BDG_MAXPORTS]; @@ -1668,19 +1680,25 @@ netmap_attach(struct netmap_adapter *arg ND("using default locks for %s", ifp->if_xname); na->nm_lock = netmap_lock_wrapper; } + #ifdef linux - if (ifp->netdev_ops) { - ND("netdev_ops %p", ifp->netdev_ops); - /* prepare a clone of the netdev ops */ - na->nm_ndo = *ifp->netdev_ops; + if (!ifp->netdev_ops) { + D("ouch, we cannot override netdev_ops"); + goto fail; } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28) + /* if needed, prepare a clone of the entire netdev ops */ + na->nm_ndo = *ifp->netdev_ops; +#endif /* 2.6.28 and above */ na->nm_ndo.ndo_start_xmit = linux_netmap_start; -#endif +#endif /* linux */ + D("success for %s", ifp->if_xname); return 0; fail: D("fail, arg %p ifp %p na %p", arg, ifp, na); + netmap_detach(ifp); return (na ? EINVAL : ENOMEM); } @@ -1726,17 +1744,18 @@ netmap_start(struct ifnet *ifp, struct m if (netmap_verbose & NM_VERB_HOST) D("%s packet %d len %d from the stack", ifp->if_xname, kring->nr_hwcur + kring->nr_hwavail, len); + if (len > NETMAP_BUF_SIZE) { /* too long for us */ + D("%s from_host, drop packet size %d > %d", ifp->if_xname, + len, NETMAP_BUF_SIZE); + m_freem(m); + return EINVAL; + } na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); if (kring->nr_hwavail >= lim) { if (netmap_verbose) D("stack ring %s full\n", ifp->if_xname); goto done; /* no space */ } - if (len > NETMAP_BUF_SIZE) { - D("%s from_host, drop packet size %d > %d", ifp->if_xname, - len, NETMAP_BUF_SIZE); - goto done; /* too long for us */ - } /* compute the insert position */ i = kring->nr_hwcur + kring->nr_hwavail; @@ -1837,6 +1856,10 @@ netmap_reset(struct netmap_adapter *na, * N rings, separate locks: * lock(i); wake(i); unlock(i); lock(core) wake(N+1) unlock(core) * work_done is non-null on the RX path. + * + * The 'q' argument also includes flag to tell whether the queue is + * already locked on enter, and whether it should remain locked on exit. + * This helps adapting to different defaults in drivers and OSes. */ int netmap_rx_irq(struct ifnet *ifp, int q, int *work_done) @@ -1844,9 +1867,14 @@ netmap_rx_irq(struct ifnet *ifp, int q, struct netmap_adapter *na; struct netmap_kring *r; NM_SELINFO_T *main_wq; + int locktype, unlocktype, lock; if (!(ifp->if_capenable & IFCAP_NETMAP)) return 0; + + lock = q & (NETMAP_LOCKED_ENTER | NETMAP_LOCKED_EXIT); + q = q & NETMAP_RING_MASK; + ND(5, "received %s queue %d", work_done ? "RX" : "TX" , q); na = NA(ifp); if (na->na_flags & NAF_SKIP_INTR) { @@ -1856,32 +1884,42 @@ netmap_rx_irq(struct ifnet *ifp, int q, if (work_done) { /* RX path */ if (q >= na->num_rx_rings) - return 0; // regular queue + return 0; // not a physical queue r = na->rx_rings + q; r->nr_kflags |= NKR_PENDINTR; main_wq = (na->num_rx_rings > 1) ? &na->rx_si : NULL; - } else { /* tx path */ + locktype = NETMAP_RX_LOCK; + unlocktype = NETMAP_RX_UNLOCK; + } else { /* TX path */ if (q >= na->num_tx_rings) - return 0; // regular queue + return 0; // not a physical queue r = na->tx_rings + q; main_wq = (na->num_tx_rings > 1) ? &na->tx_si : NULL; work_done = &q; /* dummy */ + locktype = NETMAP_TX_LOCK; + unlocktype = NETMAP_TX_UNLOCK; } if (na->separate_locks) { - mtx_lock(&r->q_lock); + if (!(lock & NETMAP_LOCKED_ENTER)) + na->nm_lock(ifp, locktype, q); selwakeuppri(&r->si, PI_NET); - mtx_unlock(&r->q_lock); + na->nm_lock(ifp, unlocktype, q); if (main_wq) { - mtx_lock(&na->core_lock); + na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); selwakeuppri(main_wq, PI_NET); - mtx_unlock(&na->core_lock); + na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0); } + /* lock the queue again if requested */ + if (lock & NETMAP_LOCKED_EXIT) + na->nm_lock(ifp, locktype, q); } else { - mtx_lock(&na->core_lock); + if (!(lock & NETMAP_LOCKED_ENTER)) + na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); selwakeuppri(&r->si, PI_NET); if (main_wq) selwakeuppri(main_wq, PI_NET); - mtx_unlock(&na->core_lock); + if (!(lock & NETMAP_LOCKED_EXIT)) + na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0); } *work_done = 1; /* do not fire napi again */ return 1; @@ -1902,7 +1940,9 @@ netmap_rx_irq(struct ifnet *ifp, int q, static u_int linux_netmap_poll(struct file * file, struct poll_table_struct *pwait) { -#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + int events = POLLIN | POLLOUT; /* XXX maybe... */ +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) int events = pwait ? pwait->key : POLLIN | POLLOUT; #else /* in 3.4.0 field 'key' was renamed to '_key' */ int events = pwait ? pwait->_key : POLLIN | POLLOUT; @@ -1942,7 +1982,7 @@ linux_netmap_mmap(struct file *f, struct * vtophys mapping in lut[k] so we use that, scanning * the lut[] array in steps of clustentries, * and we map each cluster (not individual pages, - * it would be overkill). + * it would be overkill -- XXX slow ? 20130415). */ /* Modified: head/sys/dev/netmap/netmap_kern.h ============================================================================== --- head/sys/dev/netmap/netmap_kern.h Tue Apr 30 16:00:21 2013 (r250106) +++ head/sys/dev/netmap/netmap_kern.h Tue Apr 30 16:08:34 2013 (r250107) @@ -210,10 +210,20 @@ struct netmap_adapter { int (*nm_config)(struct ifnet *, u_int *txr, u_int *txd, u_int *rxr, u_int *rxd); + /* + * Bridge support: + * + * bdg_port is the port number used in the bridge; + * na_bdg_refcount is a refcount used for bridge ports, + * when it goes to 0 we can detach+free this port + * (a bridge port is always attached if it exists; + * it is not always registered) + */ int bdg_port; + int na_bdg_refcount; + #ifdef linux struct net_device_ops nm_ndo; - int if_refcount; // XXX additions for bridge #endif /* linux */ }; @@ -248,6 +258,10 @@ enum { #endif }; +/* How to handle locking support in netmap_rx_irq/netmap_tx_irq */ +#define NETMAP_LOCKED_ENTER 0x10000000 /* already locked on enter */ +#define NETMAP_LOCKED_EXIT 0x20000000 /* keep locked on exit */ + /* * The following are support routines used by individual drivers to * support netmap operation. @@ -275,7 +289,7 @@ struct netmap_slot *netmap_reset(struct int netmap_ring_reinit(struct netmap_kring *); extern u_int netmap_buf_size; -#define NETMAP_BUF_SIZE netmap_buf_size +#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove extern int netmap_mitigate; extern int netmap_no_pendintr; extern u_int netmap_total_buffers; @@ -437,7 +451,7 @@ netmap_idx_k2n(struct netmap_kring *kr, /* Entries of the look-up table. */ struct lut_entry { void *vaddr; /* virtual address. */ - vm_paddr_t paddr; /* phisical address. */ + vm_paddr_t paddr; /* physical address. */ }; struct netmap_obj_pool; @@ -470,6 +484,4 @@ PNMB(struct netmap_slot *slot, uint64_t int netmap_rx_irq(struct ifnet *, int, int *); #define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL) - -extern int netmap_copy; #endif /* _NET_NETMAP_KERN_H_ */