Date: Tue, 13 Sep 2011 14:36:53 -0400 From: Arnaud Lacombe <lacombar@gmail.com> To: Jack Vogel <jfvogel@gmail.com> Cc: freebsd-net@freebsd.org Subject: Re: FreeBSD 7-STABLE mbuf corruption Message-ID: <CACqU3MV7JRxQ_mNeHCk7RVyzETZLAcc3XL=xyZ-qqtPfRxkZeQ@mail.gmail.com> In-Reply-To: <CAFOYbc=mu7rGU8LudLSzZwKfM3QHFw%2BPGEHHKD3rcA2=dxGfoQ@mail.gmail.com> References: <CACqU3MUs9Z9GeuGe=8iVp=MWV6eG-tO%2BkHb1znatsTq2uEqwvA@mail.gmail.com> <CACqU3MXf52tLajTfVCEiGGhtCuXsesrdM65LfsoGecuZj2tNwA@mail.gmail.com> <CAFOYbc=mu7rGU8LudLSzZwKfM3QHFw%2BPGEHHKD3rcA2=dxGfoQ@mail.gmail.com>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
Hi,
On Wed, Sep 7, 2011 at 7:57 PM, Jack Vogel <jfvogel@gmail.com> wrote:
> I have seen this, but I don't have any hot ideas right off the top of my
> head yet :(
>
I've been running for 19h now the following patches:
- backport of kmacy@'s buf_ring(9) API, from FreeBSD 8 (from [0], see
attachment for full diff)
- conversion of igb(4), from CURRENT, to use buf_ring(9) on FreeBSD
7.1 (see attachment)
- all the original patches I already sent
It did not crash, yet. The only downside is that after 3h30 and ~4h,
igb(4) queues' handler started spinning infinitely, breaking network
connectivity.
I would be tempted to say that the infinite loop issue is an igb(4)
(separate from the original crashes), and to link the crashes I was
seeing to a race in the legacy IFQ code...
- Arnaud
[0]: roughly, a cherry-pick of r185162, r185164, r185193, r185543,
r186207, r186213, r191033, r191161, r191899, r193848 and r194518.
> Jack
>
>
> On Wed, Sep 7, 2011 at 4:19 PM, Arnaud Lacombe <lacombar@gmail.com> wrote:
>>
>> Hi,
>>
>> On Mon, Sep 5, 2011 at 2:59 AM, Arnaud Lacombe <lacombar@gmail.com> wrote:
>> > Hi folks,
>> >
>> > We have been trying to track down a bad mbuf management for about two
>> > weeks on a customized 7.1 base. I have finally been able to reproduce
>> > it with a stock FreeBSD 7-STABLE (kernel from r225276, userland from
>> > 7.4).
>> >
>> > With the help of the attached patches, I have just been able to
>> > trigger the following panic:
>> >
>> > panic: Corrupted unused flags, expected 0xffffffff00000000, got 0x0,
>> > flags 0x3
>> > cpuid = 1
>> > Uptime: 3d10h5m3s
>> > Cannot dump. No dump device defined
>> >
>> General form of the crash is:
>>
>> panic: Corrupted unused flags, expected 0xffffffff00000000, got
>> 0xbabe0000000000, flags 0xbabe0000babe00
>> cpuid = 0
>> KDB: stack backtrace:
>> db_trace_self_wrapper(c0874e29,0,c0835757,f4574c48,0,...) at
>> db_trace_self_wrapper+0x26
>> panic(c0835757,0,ffffffff,0,babe00,...) at panic+0x10b
>> igb_txeof(c6a25008,0,c0837083,5ea,17c,...) at igb_txeof+0x399
>> igb_msix_que(c6a2b800,0,c084d367,4b6,c69dd068,...) at igb_msix_que+0x7b
>> ithread_loop(c6a29090,f4574d38,c084d0db,31c,c6a16828,...) at
>> ithread_loop+0xc3
>> fork_exit(c061d520,c6a29090,f4574d38) at fork_exit+0xa6
>> fork_trampoline() at fork_trampoline+0x8
>> --- trap 0, eip = 0, esp = 0xf4574d70, ebp = 0 ---
>> Uptime: 1m42s
>>
>> It happens particularly easily when the box receives wall of SYN
>> (about 1000 cnx attempts at once) every 5s or so.
>>
>> - Arnaud
>>
>> >
>> > [cut stuff no one cares about...]
>
>
[-- Attachment #2 --]
diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h
index 52c90c9..a9971cd 100644
--- a/sys/amd64/include/atomic.h
+++ b/sys/amd64/include/atomic.h
@@ -32,6 +32,10 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
+#define mb() __asm__ __volatile__ ("mfence;": : :"memory")
+#define wmb() __asm__ __volatile__ ("sfence;": : :"memory")
+#define rmb() __asm__ __volatile__ ("lfence;": : :"memory")
+
/*
* Various simple operations on memory, each of which is atomic in the
* presence of interrupts and multiple processors.
diff --git a/sys/arm/include/atomic.h b/sys/arm/include/atomic.h
index bed5a72..f0be3ae 100644
--- a/sys/arm/include/atomic.h
+++ b/sys/arm/include/atomic.h
@@ -45,6 +45,11 @@
#include <sys/types.h>
+
+#define mb()
+#define wmb()
+#define rmb()
+
#ifndef I32_bit
#define I32_bit (1 << 7) /* IRQ disable */
#endif
diff --git a/sys/conf/files b/sys/conf/files
index 8226e11..d5351c1 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1676,6 +1676,7 @@ kern/subr_acl_posix1e.c standard
kern/subr_autoconf.c standard
kern/subr_blist.c standard
kern/subr_bus.c standard
+kern/subr_bufring.c standard
kern/subr_clock.c standard
kern/subr_devstat.c standard
kern/subr_disk.c standard
diff --git a/sys/dev/bce/if_bcereg.h b/sys/dev/bce/if_bcereg.h
index 723fd26..2bb589f 100644
--- a/sys/dev/bce/if_bcereg.h
+++ b/sys/dev/bce/if_bcereg.h
@@ -541,6 +541,8 @@ default: DBPRINT(sc, BCE_INSANE_PHY, \
#endif /* BCE_DEBUG */
+
+#if __FreeBSD_version < 800054
#if defined(__i386__) || defined(__amd64__)
#define mb() __asm volatile("mfence" ::: "memory")
#define wmb() __asm volatile("sfence" ::: "memory")
@@ -550,6 +552,7 @@ default: DBPRINT(sc, BCE_INSANE_PHY, \
#define rmb()
#define wmb()
#endif
+#endif
/****************************************************************************/
/* Device identification definitions. */
diff --git a/sys/dev/cxgb/cxgb_adapter.h b/sys/dev/cxgb/cxgb_adapter.h
index b54d340..a6099a0 100644
--- a/sys/dev/cxgb/cxgb_adapter.h
+++ b/sys/dev/cxgb/cxgb_adapter.h
@@ -42,6 +42,7 @@ $FreeBSD$
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/condvar.h>
+#include <sys/buf_ring.h>
#include <net/ethernet.h>
#include <net/if.h>
@@ -265,7 +266,7 @@ struct sge_txq {
* mbuf touches
*/
struct mbuf_head cleanq;
- struct buf_ring txq_mr;
+ struct buf_ring *txq_mr;
struct mbuf *immpkt;
uint32_t txq_drops;
uint32_t txq_skipped;
diff --git a/sys/dev/cxgb/cxgb_multiq.c b/sys/dev/cxgb/cxgb_multiq.c
index 045b094..dc667be 100644
--- a/sys/dev/cxgb/cxgb_multiq.c
+++ b/sys/dev/cxgb/cxgb_multiq.c
@@ -134,7 +134,7 @@ cxgb_pcpu_enqueue_packet_(struct sge_qset *qs, struct mbuf *m)
return (ENETDOWN);
}
txq = &qs->txq[TXQ_ETH];
- err = buf_ring_enqueue(&txq->txq_mr, m);
+ err = drbr_enqueue(qs->port->ifp, txq->txq_mr, m);
if (err) {
txq->txq_drops++;
m_freem(m);
@@ -199,14 +199,11 @@ cxgb_dequeue_packet(struct sge_txq *txq, struct mbuf **m_vec)
}
sc = qs->port->adapter;
- m = buf_ring_dequeue(&txq->txq_mr);
+ m = buf_ring_dequeue_sc(txq->txq_mr);
if (m == NULL)
return (0);
count = 1;
- KASSERT(m->m_type == MT_DATA,
- ("m=%p is bad mbuf type %d from ring cons=%d prod=%d", m,
- m->m_type, txq->txq_mr.br_cons, txq->txq_mr.br_prod));
m_vec[0] = m;
if (m->m_pkthdr.tso_segsz > 0 || m->m_pkthdr.len > TX_WR_SIZE_MAX ||
m->m_next != NULL || (cxgb_pcpu_tx_coalesce == 0)) {
@@ -214,14 +211,14 @@ cxgb_dequeue_packet(struct sge_txq *txq, struct mbuf **m_vec)
}
size = m->m_pkthdr.len;
- for (m = buf_ring_peek(&txq->txq_mr); m != NULL;
- m = buf_ring_peek(&txq->txq_mr)) {
+ for (m = buf_ring_peek(txq->txq_mr); m != NULL;
+ m = buf_ring_peek(txq->txq_mr)) {
if (m->m_pkthdr.tso_segsz > 0 ||
size + m->m_pkthdr.len > TX_WR_SIZE_MAX || m->m_next != NULL)
break;
- buf_ring_dequeue(&txq->txq_mr);
+ buf_ring_dequeue_sc(txq->txq_mr);
size += m->m_pkthdr.len;
m_vec[count++] = m;
@@ -372,7 +369,7 @@ cxgb_pcpu_free(struct sge_qset *qs)
mtx_lock(&txq->lock);
while ((m = mbufq_dequeue(&txq->sendq)) != NULL)
m_freem(m);
- while ((m = buf_ring_dequeue(&txq->txq_mr)) != NULL)
+ while ((m = buf_ring_dequeue_sc(txq->txq_mr)) != NULL)
m_freem(m);
t3_free_tx_desc_all(txq);
@@ -434,7 +431,7 @@ cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
initerr = ENETDOWN;
else if (immpkt) {
- if (!buf_ring_empty(&txq->txq_mr))
+ if (!buf_ring_empty(txq->txq_mr))
initerr = cxgb_pcpu_enqueue_packet_(qs, immpkt);
else
txq->immpkt = immpkt;
@@ -465,7 +462,7 @@ cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
}
stopped = isset(&qs->txq_stopped, TXQ_ETH);
- flush = (((!buf_ring_empty(&txq->txq_mr) || (!IFQ_DRV_IS_EMPTY(&pi->ifp->if_snd))) && !stopped) || txq->immpkt);
+ flush = (((!buf_ring_empty(txq->txq_mr) || (!IFQ_DRV_IS_EMPTY(&pi->ifp->if_snd))) && !stopped) || txq->immpkt);
max_desc = tx_flush ? TX_ETH_Q_SIZE : TX_START_MAX_DESC;
if (cxgb_debug)
@@ -476,7 +473,7 @@ cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
if ((tx_flush && flush && err == 0) &&
- (!buf_ring_empty(&txq->txq_mr) ||
+ (!buf_ring_empty(txq->txq_mr) ||
!IFQ_DRV_IS_EMPTY(&pi->ifp->if_snd))) {
struct thread *td = curthread;
@@ -526,7 +523,7 @@ cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *immpkt)
txq = &qs->txq[TXQ_ETH];
if (((sc->tunq_coalesce == 0) ||
- (buf_ring_count(&txq->txq_mr) >= TX_WR_COUNT_MAX) ||
+ (buf_ring_count(txq->txq_mr) >= TX_WR_COUNT_MAX) ||
(cxgb_pcpu_tx_coalesce == 0)) && mtx_trylock(&txq->lock)) {
if (cxgb_debug)
printf("doing immediate transmit\n");
@@ -534,12 +531,12 @@ cxgb_pcpu_start(struct ifnet *ifp, struct mbuf *immpkt)
txq->flags |= TXQ_TRANSMITTING;
err = cxgb_pcpu_start_(qs, immpkt, FALSE);
txq->flags &= ~TXQ_TRANSMITTING;
- resid = (buf_ring_count(&txq->txq_mr) > 64) || (desc_reclaimable(txq) > 64);
+ resid = (buf_ring_count(txq->txq_mr) > 64) || (desc_reclaimable(txq) > 64);
mtx_unlock(&txq->lock);
} else if (immpkt) {
if (cxgb_debug)
printf("deferred coalesce=%jx ring_count=%d mtx_owned=%d\n",
- sc->tunq_coalesce, buf_ring_count(&txq->txq_mr), mtx_owned(&txq->lock));
+ sc->tunq_coalesce, buf_ring_count(txq->txq_mr), mtx_owned(&txq->lock));
err = cxgb_pcpu_enqueue_packet_(qs, immpkt);
}
@@ -591,7 +588,7 @@ cxgb_pcpu_start_proc(void *arg)
if ((qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
idleticks = hz;
- if (!buf_ring_empty(&txq->txq_mr) ||
+ if (!buf_ring_empty(txq->txq_mr) ||
!mbufq_empty(&txq->sendq))
cxgb_pcpu_free(qs);
goto done;
@@ -616,11 +613,13 @@ cxgb_pcpu_start_proc(void *arg)
mtx_unlock(&qs->rspq.lock);
}
#endif
- if ((!buf_ring_empty(&txq->txq_mr)) && err == 0) {
+ if ((!buf_ring_empty(txq->txq_mr)) && err == 0) {
+#if 0
if (cxgb_debug)
printf("head=%p cons=%d prod=%d\n",
txq->sendq.head, txq->txq_mr.br_cons,
txq->txq_mr.br_prod);
+#endif
continue;
}
done:
diff --git a/sys/dev/cxgb/cxgb_osdep.h b/sys/dev/cxgb/cxgb_osdep.h
index 29b9e2f..4f42290 100644
--- a/sys/dev/cxgb/cxgb_osdep.h
+++ b/sys/dev/cxgb/cxgb_osdep.h
@@ -155,9 +155,6 @@ void cxgb_log_tcb(struct adapter *sc, unsigned int tid);
#if defined(__i386__) || defined(__amd64__)
-#define mb() __asm volatile("mfence":::"memory")
-#define rmb() __asm volatile("lfence":::"memory")
-#define wmb() __asm volatile("sfence" ::: "memory")
#define smp_mb() mb()
#define L1_CACHE_BYTES 128
@@ -178,163 +175,11 @@ extern void kdb_backtrace(void);
#else /* !i386 && !amd64 */
-#define mb()
-#define rmb()
-#define wmb()
#define smp_mb()
#define prefetch(x)
#define L1_CACHE_BYTES 32
#endif
-struct buf_ring {
- caddr_t *br_ring;
- volatile uint32_t br_cons;
- volatile uint32_t br_prod;
- int br_size;
- struct mtx br_lock;
-};
-
-struct buf_ring *buf_ring_alloc(int count, int flags);
-void buf_ring_free(struct buf_ring *);
-
-static __inline int
-buf_ring_count(struct buf_ring *mr)
-{
- int size = mr->br_size;
- uint32_t mask = size - 1;
-
- return ((size + mr->br_prod - mr->br_cons) & mask);
-}
-
-static __inline int
-buf_ring_empty(struct buf_ring *mr)
-{
- return (mr->br_cons == mr->br_prod);
-}
-
-static __inline int
-buf_ring_full(struct buf_ring *mr)
-{
- uint32_t mask;
-
- mask = mr->br_size - 1;
- return (mr->br_cons == ((mr->br_prod + 1) & mask));
-}
-
-/*
- * The producer and consumer are independently locked
- * this relies on the consumer providing his own serialization
- *
- */
-static __inline void *
-buf_ring_dequeue(struct buf_ring *mr)
-{
- uint32_t prod, cons, mask;
- caddr_t *ring, m;
-
- ring = (caddr_t *)mr->br_ring;
- mask = mr->br_size - 1;
- cons = mr->br_cons;
- mb();
- prod = mr->br_prod;
- m = NULL;
- if (cons != prod) {
- m = ring[cons];
- ring[cons] = NULL;
- mr->br_cons = (cons + 1) & mask;
- mb();
- }
- return (m);
-}
-
-#ifdef DEBUG_BUFRING
-static __inline void
-__buf_ring_scan(struct buf_ring *mr, void *m, char *file, int line)
-{
- int i;
-
- for (i = 0; i < mr->br_size; i++)
- if (m == mr->br_ring[i])
- panic("%s:%d m=%p present prod=%d cons=%d idx=%d", file,
- line, m, mr->br_prod, mr->br_cons, i);
-}
-
-static __inline void
-buf_ring_scan(struct buf_ring *mr, void *m, char *file, int line)
-{
- mtx_lock(&mr->br_lock);
- __buf_ring_scan(mr, m, file, line);
- mtx_unlock(&mr->br_lock);
-}
-
-#else
-static __inline void
-__buf_ring_scan(struct buf_ring *mr, void *m, char *file, int line)
-{
-}
-
-static __inline void
-buf_ring_scan(struct buf_ring *mr, void *m, char *file, int line)
-{
-}
-#endif
-
-static __inline int
-__buf_ring_enqueue(struct buf_ring *mr, void *m, char *file, int line)
-{
-
- uint32_t prod, cons, mask;
- int err;
-
- mask = mr->br_size - 1;
- prod = mr->br_prod;
- mb();
- cons = mr->br_cons;
- __buf_ring_scan(mr, m, file, line);
- if (((prod + 1) & mask) != cons) {
- KASSERT(mr->br_ring[prod] == NULL, ("overwriting entry"));
- mr->br_ring[prod] = m;
- mb();
- mr->br_prod = (prod + 1) & mask;
- err = 0;
- } else
- err = ENOBUFS;
-
- return (err);
-}
-
-static __inline int
-buf_ring_enqueue_(struct buf_ring *mr, void *m, char *file, int line)
-{
- int err;
-
- mtx_lock(&mr->br_lock);
- err = __buf_ring_enqueue(mr, m, file, line);
- mtx_unlock(&mr->br_lock);
-
- return (err);
-}
-
-#define buf_ring_enqueue(mr, m) buf_ring_enqueue_((mr), (m), __FILE__, __LINE__)
-
-
-static __inline void *
-buf_ring_peek(struct buf_ring *mr)
-{
- int prod, cons, mask;
- caddr_t *ring, m;
-
- ring = (caddr_t *)mr->br_ring;
- mask = mr->br_size - 1;
- cons = mr->br_cons;
- prod = mr->br_prod;
- m = NULL;
- if (cons != prod)
- m = ring[cons];
-
- return (m);
-}
-
#define DBG_RX (1 << 0)
static const int debug_flags = DBG_RX;
diff --git a/sys/dev/cxgb/cxgb_sge.c b/sys/dev/cxgb/cxgb_sge.c
index 26233d9..1a00d42 100644
--- a/sys/dev/cxgb/cxgb_sge.c
+++ b/sys/dev/cxgb/cxgb_sge.c
@@ -1716,10 +1716,8 @@ t3_free_qset(adapter_t *sc, struct sge_qset *q)
t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
for (i = 0; i < SGE_TXQ_PER_SET; i++)
- if (q->txq[i].txq_mr.br_ring != NULL) {
- free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
- mtx_destroy(&q->txq[i].txq_mr.br_lock);
- }
+ if (q->txq[i].txq_mr != NULL)
+ buf_ring_free(q->txq[i].txq_mr, M_DEVBUF);
for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
if (q->fl[i].desc) {
mtx_lock_spin(&sc->sge.reg_lock);
@@ -1874,7 +1872,6 @@ t3_free_tx_desc(struct sge_txq *q, int reclaimable)
txsd->flags &= ~TX_SW_DESC_MAPPED;
}
m_freem_iovec(&txsd->mi);
- buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
txsd->mi.mi_base = NULL;
#if defined(DIAGNOSTIC) && 0
if (m_get_priority(txsd->m[0]) != cidx)
@@ -2272,14 +2269,12 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
int i, header_size, ret = 0;
for (i = 0; i < SGE_TXQ_PER_SET; i++) {
- if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
- M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
+
+ if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
+ M_DEVBUF, M_WAITOK, &q->txq[i].lock)) == NULL) {
device_printf(sc->dev, "failed to allocate mbuf ring\n");
goto err;
}
- q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
- q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
- mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
}
init_qset_cntxt(q, id);
@@ -3496,12 +3491,14 @@ t3_add_configured_sysctls(adapter_t *sc)
SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
0, "#tunneled packets waiting to be sent");
+#if 0
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
0, "#tunneled packets queue producer index");
SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
0, "#tunneled packets queue consumer index");
+#endif
SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
0, "#tunneled packets processed by the card");
diff --git a/sys/dev/cxgb/sys/cxgb_support.c b/sys/dev/cxgb/sys/cxgb_support.c
index e911dfc..18f799e 100644
--- a/sys/dev/cxgb/sys/cxgb_support.c
+++ b/sys/dev/cxgb/sys/cxgb_support.c
@@ -308,33 +308,3 @@ free:
uma_zfree(zone, vec[i]);
}
-struct buf_ring *
-buf_ring_alloc(int count, int flags)
-{
- struct buf_ring *br;
-
- KASSERT(powerof2(count), ("buf ring must be size power of 2"));
-
- br = malloc(sizeof(struct buf_ring), M_DEVBUF, flags|M_ZERO);
- if (br == NULL)
- return (NULL);
-
- br->br_ring = malloc(sizeof(caddr_t)*count, M_DEVBUF, flags|M_ZERO);
- if (br->br_ring == NULL) {
- free(br, M_DEVBUF);
- return (NULL);
- }
-
- mtx_init(&br->br_lock, "buf ring", NULL, MTX_DUPOK|MTX_DEF);
- br->br_size = count;
- br->br_prod = br->br_cons = 0;
-
- return (br);
-}
-
-void
-buf_ring_free(struct buf_ring *br)
-{
- free(br->br_ring, M_DEVBUF);
- free(br, M_DEVBUF);
-}
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c
index 9d3618f..47d9d8e 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c
@@ -102,7 +102,7 @@ static int __cxio_init_resource_fifo(struct buf_ring **fifo,
u32 rarray[16];
mtx_init(fifo_lock, "cxio fifo", NULL, MTX_DEF|MTX_DUPOK);
- *fifo = buf_ring_alloc(nr, M_NOWAIT);
+ *fifo = buf_ring_alloc(nr, M_DEVBUF, M_NOWAIT, fifo_lock);
if (*fifo == NULL)
return (-ENOMEM);
#if 0
@@ -134,7 +134,7 @@ static int __cxio_init_resource_fifo(struct buf_ring **fifo,
buf_ring_enqueue(*fifo, (void *) (uintptr_t)i);
#if 0
for (i = 0; i < skip_low + skip_high; i++)
- buf_ring_dequeue(*fifo);
+ buf_ring_dequeue_sc(*fifo);
#endif
return 0;
}
@@ -161,7 +161,8 @@ static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p)
mtx_init(&rdev_p->rscp->qpid_fifo_lock, "qpid fifo", NULL, MTX_DEF);
- rdev_p->rscp->qpid_fifo = buf_ring_alloc(T3_MAX_NUM_QP, M_NOWAIT);
+ rdev_p->rscp->qpid_fifo = buf_ring_alloc(T3_MAX_NUM_QP, M_DEVBUF,
+ M_NOWAIT, &rdev_p->rscp->qpid_fifo_lock);
if (rdev_p->rscp->qpid_fifo == NULL)
return (-ENOMEM);
@@ -180,7 +181,7 @@ int cxio_hal_init_rhdl_resource(u32 nr_rhdl)
void cxio_hal_destroy_rhdl_resource(void)
{
- buf_ring_free(rhdl_fifo);
+ buf_ring_free(rhdl_fifo, M_DEVBUF);
}
#endif
@@ -214,11 +215,11 @@ int cxio_hal_init_resource(struct cxio_rdev *rdev_p,
goto pdid_err;
return 0;
pdid_err:
- buf_ring_free(rscp->cqid_fifo);
+ buf_ring_free(rscp->cqid_fifo, M_DEVBUF);
cqid_err:
- buf_ring_free(rscp->qpid_fifo);
+ buf_ring_free(rscp->qpid_fifo, M_DEVBUF);
qpid_err:
- buf_ring_free(rscp->tpt_fifo);
+ buf_ring_free(rscp->tpt_fifo, M_DEVBUF);
tpt_err:
return (-ENOMEM);
}
@@ -231,7 +232,7 @@ static u32 cxio_hal_get_resource(struct buf_ring *fifo, struct mtx *lock)
u32 entry;
mtx_lock(lock);
- entry = (u32)(uintptr_t)buf_ring_dequeue(fifo);
+ entry = (u32)(uintptr_t)buf_ring_dequeue_sc(fifo);
mtx_unlock(lock);
return entry;
}
@@ -288,10 +289,10 @@ void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid)
void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
{
- buf_ring_free(rscp->tpt_fifo);
- buf_ring_free(rscp->cqid_fifo);
- buf_ring_free(rscp->qpid_fifo);
- buf_ring_free(rscp->pdid_fifo);
+ buf_ring_free(rscp->tpt_fifo, M_DEVBUF);
+ buf_ring_free(rscp->cqid_fifo, M_DEVBUF);
+ buf_ring_free(rscp->qpid_fifo, M_DEVBUF);
+ buf_ring_free(rscp->pdid_fifo, M_DEVBUF);
free(rscp, M_DEVBUF);
}
diff --git a/sys/dev/mxge/if_mxge_var.h b/sys/dev/mxge/if_mxge_var.h
index e5d176d..a91b4d8 100644
--- a/sys/dev/mxge/if_mxge_var.h
+++ b/sys/dev/mxge/if_mxge_var.h
@@ -298,6 +298,8 @@ struct mxge_media_type
/* implement our own memory barriers, since bus_space_barrier
cannot handle write-combining regions */
+#if __FreeBSD_version < 800053
+
#if defined (__GNUC__)
#if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__
#define mb() __asm__ __volatile__ ("sfence;": : :"memory")
@@ -312,6 +314,8 @@ struct mxge_media_type
#error "unknown compiler"
#endif
+#endif
+
static inline void
mxge_pio_copy(volatile void *to_v, void *from_v, size_t size)
{
diff --git a/sys/dev/nxge/xge-osdep.h b/sys/dev/nxge/xge-osdep.h
index 15adfe7..e8f4aba 100644
--- a/sys/dev/nxge/xge-osdep.h
+++ b/sys/dev/nxge/xge-osdep.h
@@ -242,8 +242,12 @@ typedef xge_pci_info_t *pci_cfg_h;
mtx_unlock_flags(lockp, flags); \
}
+#if __FreeBSD_version > 800053
/* Write memory barrier */
+#define xge_os_wmb() wmb()
+#else
#define xge_os_wmb()
+#endif
/* Delay (in micro seconds) */
#define xge_os_udelay(us) DELAY(us)
diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h
index 06216fb..bbf2655 100644
--- a/sys/i386/include/atomic.h
+++ b/sys/i386/include/atomic.h
@@ -32,6 +32,21 @@
#error this file needs sys/cdefs.h as a prerequisite
#endif
+
+#if defined(I686_CPU)
+#define mb() __asm__ __volatile__ ("mfence;": : :"memory")
+#define wmb() __asm__ __volatile__ ("sfence;": : :"memory")
+#define rmb() __asm__ __volatile__ ("lfence;": : :"memory")
+#else
+/*
+ * do we need a serializing instruction?
+ */
+#define mb()
+#define wmb()
+#define rmb()
+#endif
+
+
/*
* Various simple operations on memory, each of which is atomic in the
* presence of interrupts and multiple processors.
diff --git a/sys/ia64/include/atomic.h b/sys/ia64/include/atomic.h
index 631193f..fdfcb9e 100644
--- a/sys/ia64/include/atomic.h
+++ b/sys/ia64/include/atomic.h
@@ -29,6 +29,10 @@
#ifndef _MACHINE_ATOMIC_H_
#define _MACHINE_ATOMIC_H_
+#define mb()
+#define wmb()
+#define rmb()
+
/*
* Various simple arithmetic on memory which is atomic in the presence
* of interrupts and SMP safe.
diff --git a/sys/kern/subr_bufring.c b/sys/kern/subr_bufring.c
new file mode 100644
index 0000000..63938ea
--- /dev/null
+++ b/sys/kern/subr_bufring.c
@@ -0,0 +1,68 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2007,2008 Kip Macy kmacy@freebsd.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. The name of Kip Macy nor the names of other
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ ***************************************************************************/
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/ktr.h>
+#include <sys/buf_ring.h>
+
+
+struct buf_ring *
+buf_ring_alloc(int count, struct malloc_type *type, int flags, struct mtx *lock)
+{
+ struct buf_ring *br;
+
+ KASSERT(powerof2(count), ("buf ring must be size power of 2"));
+
+ br = malloc(sizeof(struct buf_ring) + count*sizeof(caddr_t),
+ type, flags|M_ZERO);
+ if (br == NULL)
+ return (NULL);
+#ifdef DEBUG_BUFRING
+ br->br_lock = lock;
+#endif
+ br->br_prod_size = br->br_cons_size = count;
+ br->br_prod_mask = br->br_cons_mask = count-1;
+ br->br_prod_head = br->br_cons_head = 0;
+ br->br_prod_tail = br->br_cons_tail = 0;
+
+ return (br);
+}
+
+void
+buf_ring_free(struct buf_ring *br, struct malloc_type *type)
+{
+ free(br, type);
+}
diff --git a/sys/net/if.c b/sys/net/if.c
index 77d4ec3..358d8f2 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -119,10 +119,10 @@ static void if_freemulti(struct ifmultiaddr *);
static void if_grow(void);
static void if_init(void *);
static void if_check(void *);
-static void if_qflush(struct ifaltq *);
static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static void if_slowtimo(void *);
+static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int if_rtdel(struct radix_node *, void *);
@@ -507,6 +507,28 @@ if_free_type(struct ifnet *ifp, u_char type)
free(ifp, M_IFNET);
};
+void
+ifq_attach(struct ifaltq *ifq, struct ifnet *ifp)
+{
+
+ mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
+
+ if (ifq->ifq_maxlen == 0)
+ ifq->ifq_maxlen = ifqmaxlen;
+
+ ifq->altq_type = 0;
+ ifq->altq_disc = NULL;
+ ifq->altq_flags &= ALTQF_CANTCHANGE;
+ ifq->altq_tbr = NULL;
+ ifq->altq_ifp = ifp;
+}
+
+void
+ifq_detach(struct ifaltq *ifq)
+{
+ mtx_destroy(&ifq->ifq_mtx);
+}
+
/*
* Perform generic interface initalization tasks and attach the interface
* to the list of "active" interfaces.
@@ -547,7 +569,15 @@ if_attach(struct ifnet *ifp)
getmicrotime(&ifp->if_lastchange);
ifp->if_data.ifi_epoch = time_uptime;
ifp->if_data.ifi_datalen = sizeof(struct if_data);
+ KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) ||
+ (ifp->if_transmit != NULL && ifp->if_qflush != NULL),
+ ("transmit and qflush must both either be set or both be NULL"));
+ if (ifp->if_transmit == NULL) {
+ ifp->if_transmit = if_transmit;
+ ifp->if_qflush = if_qflush;
+ }
+
#ifdef MAC
mac_init_ifnet(ifp);
mac_create_ifnet(ifp);
@@ -559,7 +589,7 @@ if_attach(struct ifnet *ifp)
make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
net_cdevsw.d_name, ifp->if_index);
- mtx_init(&ifp->if_snd.ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
+ ifq_attach(&ifp->if_snd, ifp);
/*
* create a Link Level name for this device
@@ -596,11 +626,15 @@ if_attach(struct ifnet *ifp)
ifa->ifa_refcnt = 1;
TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */
+<<<<<<< HEAD
ifp->if_snd.altq_type = 0;
ifp->if_snd.altq_disc = NULL;
ifp->if_snd.altq_flags &= ALTQF_CANTCHANGE;
ifp->if_snd.altq_tbr = NULL;
ifp->if_snd.altq_ifp = ifp;
+=======
+
+>>>>>>> 1be87f0... - bump __FreeBSD version to reflect added buf_ring, memory barriers,
IFNET_WLOCK();
TAILQ_INSERT_TAIL(&ifnet, ifp, if_link);
@@ -849,7 +883,7 @@ if_detach(struct ifnet *ifp)
KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
knlist_clear(&ifp->if_klist, 0);
knlist_destroy(&ifp->if_klist);
- mtx_destroy(&ifp->if_snd.ifq_mtx);
+ ifq_detach(&ifp->if_snd);
IF_AFDATA_DESTROY(ifp);
splx(s);
}
@@ -1440,7 +1474,8 @@ if_unroute(struct ifnet *ifp, int flag, int fam)
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
- if_qflush(&ifp->if_snd);
+ ifp->if_qflush(ifp);
+
#ifdef DEV_CARP
if (ifp->if_carp)
carp_carpdev_state(ifp->if_carp);
@@ -1567,11 +1602,13 @@ if_up(struct ifnet *ifp)
/*
* Flush an interface queue.
*/
-static void
-if_qflush(struct ifaltq *ifq)
+void
+if_qflush(struct ifnet *ifp)
{
struct mbuf *m, *n;
-
+ struct ifaltq *ifq;
+
+ ifq = &ifp->if_snd;
IFQ_LOCK(ifq);
#ifdef ALTQ
if (ALTQ_IS_ENABLED(ifq))
@@ -2862,6 +2899,19 @@ if_start_deferred(void *context, int pending)
(ifp->if_start)(ifp);
}
+/*
+ * Backwards compatibility interface for drivers
+ * that have not implemented it
+ */
+static int
+if_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ int error;
+
+ IFQ_HANDOFF(ifp, m, error);
+ return (error);
+}
+
int
if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
{
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index fa43ae0..24acac8 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -383,7 +383,6 @@ bad: if (m != NULL)
int
ether_output_frame(struct ifnet *ifp, struct mbuf *m)
{
- int error;
#if defined(INET) || defined(INET6)
struct ip_fw *rule = ip_dn_claim_rule(m);
@@ -402,8 +401,7 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m)
* Queue message on interface, update output statistics if
* successful, and start output if interface not yet active.
*/
- IFQ_HANDOFF(ifp, m, error);
- return (error);
+ return ((ifp->if_transmit)(ifp, m));
}
#if defined(INET) || defined(INET6)
diff --git a/sys/net/if_fddisubr.c b/sys/net/if_fddisubr.c
index a2fd00b..61f8f56 100644
--- a/sys/net/if_fddisubr.c
+++ b/sys/net/if_fddisubr.c
@@ -336,7 +336,7 @@ fddi_output(ifp, m, dst, rt0)
}
}
- IFQ_HANDOFF(ifp, m, error);
+ error = (ifp->if_transmit)(ifp, m);
if (error)
ifp->if_oerrors++;
diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c
index b1c68d2..d53b318 100644
--- a/sys/net/if_fwsubr.c
+++ b/sys/net/if_fwsubr.c
@@ -249,7 +249,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
*/
enc->ul[0] = htonl(enc->ul[0]);
- IFQ_HANDOFF(ifp, m, error);
+ error = (ifp->if_transmit)(ifp, m);
return (error);
} else {
/*
@@ -309,7 +309,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
enc->ul[0] = htonl(enc->ul[0]);
enc->ul[1] = htonl(enc->ul[1]);
- IFQ_HANDOFF(ifp, m, error);
+ error = (ifp->if_transmit)(ifp, m);
if (error) {
if (mtail)
m_freem(mtail);
diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c
index 42ca7c6..be0cf05 100644
--- a/sys/net/if_lagg.c
+++ b/sys/net/if_lagg.c
@@ -1374,12 +1374,8 @@ out:
int
lagg_enqueue(struct ifnet *ifp, struct mbuf *m)
{
- int error = 0;
- IFQ_HANDOFF(ifp, m, error);
- if (error)
- ifp->if_oerrors++;
- return (error);
+ return (ifp->if_transmit)(ifp, m);
}
/*
diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c
index 0852ad5..82723f1 100644
--- a/sys/net/if_tun.c
+++ b/sys/net/if_tun.c
@@ -647,7 +647,7 @@ tunoutput(
}
}
- IFQ_HANDOFF(ifp, m0, error);
+ error = (ifp->if_transmit)(ifp, m0);
if (error) {
ifp->if_collisions++;
return (ENOBUFS);
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 92c8e80..d4f3823 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -77,6 +77,7 @@ struct ifvlantrunk;
#ifdef _KERNEL
#include <sys/mbuf.h>
#include <sys/eventhandler.h>
+#include <sys/buf_ring.h>
#endif /* _KERNEL */
#include <sys/lock.h> /* XXX */
#include <sys/mutex.h> /* XXX */
@@ -186,7 +187,11 @@ struct ifnet {
/* protected by if_addr_mtx */
void *if_pf_kif;
void *if_lagg; /* lagg glue */
- void *if_pspare[10]; /* multiq/TOE 3; vimage 3; general use 4 */
+ void *if_pspare[8]; /* multiq/TOE 3; vimage 3; general use 4 */
+ void (*if_qflush) /* flush any queues */
+ (struct ifnet *);
+ int (*if_transmit) /* initiate output routine */
+ (struct ifnet *, struct mbuf *);
int if_ispare[2]; /* general use 2 */
};
@@ -536,6 +541,119 @@ do { \
IFQ_PURGE(ifq); \
} while (0)
+#ifdef _KERNEL
+static __inline void
+drbr_stats_update(struct ifnet *ifp, int len, int mflags)
+{
+#ifndef NO_SLOW_STATS
+ ifp->if_obytes += len;
+ if (mflags & M_MCAST)
+ ifp->if_omcasts++;
+#endif
+}
+
+static __inline int
+drbr_enqueue(struct ifnet *ifp, struct buf_ring *br, struct mbuf *m)
+{
+ int error = 0;
+ int len = m->m_pkthdr.len;
+ int mflags = m->m_flags;
+
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_ENQUEUE(&ifp->if_snd, m, error);
+ return (error);
+ }
+#endif
+ if ((error = buf_ring_enqueue_bytes(br, m, len)) == ENOBUFS) {
+ br->br_drops++;
+ m_freem(m);
+ } else
+ drbr_stats_update(ifp, len, mflags);
+
+ return (error);
+}
+
+static __inline void
+drbr_flush(struct ifnet *ifp, struct buf_ring *br)
+{
+ struct mbuf *m;
+
+#ifdef ALTQ
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
+ IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+ m_freem(m);
+ }
+ }
+#endif
+ while ((m = buf_ring_dequeue_sc(br)) != NULL)
+ m_freem(m);
+}
+
+static __inline void
+drbr_free(struct buf_ring *br, struct malloc_type *type)
+{
+
+ drbr_flush(NULL, br);
+ buf_ring_free(br, type);
+}
+
+static __inline struct mbuf *
+drbr_dequeue(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ struct mbuf *m;
+
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ return (buf_ring_dequeue_sc(br));
+}
+
+static __inline struct mbuf *
+drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
+ int (*func) (struct mbuf *, void *), void *arg)
+{
+ struct mbuf *m;
+#ifdef ALTQ
+ /*
+ * XXX need to evaluate / requeue
+ */
+ if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ m = buf_ring_peek(br);
+ if (m == NULL || func(m, arg) == 0)
+ return (NULL);
+
+ return (buf_ring_dequeue_sc(br));
+}
+
+static __inline int
+drbr_empty(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (IFQ_DRV_IS_EMPTY(&ifp->if_snd));
+#endif
+ return (buf_ring_empty(br));
+}
+
+static __inline int
+drbr_inuse(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ if (ALTQ_IS_ENABLED(&ifp->if_snd))
+ return (ifp->if_snd.ifq_len);
+#endif
+ return (buf_ring_count(br));
+}
+#endif
/*
* 72 was chosen below because it is the size of a TCP/IP
* header (40) + the minimum mss (32).
@@ -677,6 +795,7 @@ void if_free_type(struct ifnet *, u_char);
void if_initname(struct ifnet *, const char *, int);
void if_link_state_change(struct ifnet *, int);
int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
+void if_qflush(struct ifnet *);
int if_setlladdr(struct ifnet *, const u_char *, int);
void if_up(struct ifnet *);
/*void ifinit(void);*/ /* declared in systm.h for main() */
@@ -684,6 +803,9 @@ int ifioctl(struct socket *, u_long, caddr_t, struct thread *);
int ifpromisc(struct ifnet *, int);
struct ifnet *ifunit(const char *);
+void ifq_attach(struct ifaltq *, struct ifnet *ifp);
+void ifq_detach(struct ifaltq *);
+
struct ifaddr *ifa_ifwithaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c
index c1881da..b1f7dee 100644
--- a/sys/net/if_vlan.c
+++ b/sys/net/if_vlan.c
@@ -871,7 +871,7 @@ vlan_start(struct ifnet *ifp)
* Send it, precisely as ether_output() would have.
* We are already running at splimp.
*/
- IFQ_HANDOFF(p, m, error);
+ error = (p->if_transmit)(p, m);
if (!error)
ifp->if_opackets++;
else
diff --git a/sys/net80211/ieee80211_output.c b/sys/net80211/ieee80211_output.c
index c82de48..ee96325 100644
--- a/sys/net80211/ieee80211_output.c
+++ b/sys/net80211/ieee80211_output.c
@@ -233,7 +233,22 @@ ieee80211_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
if_start(ifp);
ifp->if_opackets++;
+#error "r185164 cherry-pick conflicted here, please fix."
+<<<<<<< HEAD
return 0;
+=======
+ /* XXX defer if_start calls? */
+ error = (parent->if_transmit)(parent, m);
+ if (error != 0) {
+ /* NB: IFQ_HANDOFF reclaims mbuf */
+ ieee80211_free_node(ni);
+ } else {
+ ifp->if_opackets++;
+ }
+ ic->ic_lastdata = ticks;
+ }
+#undef IS_DWDS
+>>>>>>> eee4f1f... convert calls to IFQ_HANDOFF to if_transmit
}
/*
diff --git a/sys/netgraph/ng_iface.c b/sys/netgraph/ng_iface.c
index 5b76c29..6358d89 100644
--- a/sys/netgraph/ng_iface.c
+++ b/sys/netgraph/ng_iface.c
@@ -401,7 +401,7 @@ ng_iface_output(struct ifnet *ifp, struct mbuf *m,
return (ENOBUFS);
}
*(sa_family_t *)m->m_data = dst->sa_family;
- IFQ_HANDOFF(ifp, m, error);
+ error = (ifp->if_transmit)(ifp, m);
} else
error = ng_iface_send(ifp, m, dst->sa_family);
diff --git a/sys/powerpc/include/atomic.h b/sys/powerpc/include/atomic.h
index d515a6a..4ac9f0c 100644
--- a/sys/powerpc/include/atomic.h
+++ b/sys/powerpc/include/atomic.h
@@ -39,6 +39,10 @@
#define __ATOMIC_BARRIER \
__asm __volatile("sync" : : : "memory")
+#define mb() __ATOMIC_BARRIER
+#define wmb() mb()
+#define rmb() mb()
+
/*
* atomic_add(p, v)
* { *p += v; }
diff --git a/sys/sparc64/include/atomic.h b/sys/sparc64/include/atomic.h
index fe36791..d663fbc 100644
--- a/sys/sparc64/include/atomic.h
+++ b/sys/sparc64/include/atomic.h
@@ -40,6 +40,10 @@
#define __ASI_ATOMIC ASI_P
#endif
+#define mb() __asm__ __volatile__ ("membar #MemIssue": : :"memory")
+#define wmb() mb()
+#define rmb() mb()
+
/*
* Various simple arithmetic on memory which is atomic in the presence
* of interrupts and multiple processors. See atomic(9) for details.
diff --git a/sys/sun4v/include/atomic.h b/sys/sun4v/include/atomic.h
index fe36791..c5005fa 100644
--- a/sys/sun4v/include/atomic.h
+++ b/sys/sun4v/include/atomic.h
@@ -33,6 +33,10 @@
#include <machine/cpufunc.h>
+#define mb() __asm__ __volatile__ ("membar #MemIssue": : :"memory")
+#define wmb() mb()
+#define rmb() mb()
+
/* Userland needs different ASI's. */
#ifdef _KERNEL
#define __ASI_ATOMIC ASI_N
diff --git a/sys/sys/buf_ring.h b/sys/sys/buf_ring.h
new file mode 100644
index 0000000..efa667d
--- /dev/null
+++ b/sys/sys/buf_ring.h
@@ -0,0 +1,279 @@
+/**************************************************************************
+ *
+ * Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. The name of Kip Macy nor the names of other
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ *
+ ***************************************************************************/
+
+#ifndef _SYS_BUF_RING_H_
+#define _SYS_BUF_RING_H_
+
+#include <machine/cpu.h>
+
+#if defined(INVARIANTS) && !defined(DEBUG_BUFRING)
+#define DEBUG_BUFRING 1
+#endif
+
+#ifdef DEBUG_BUFRING
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#endif
+
+struct buf_ring {
+ volatile uint32_t br_prod_head;
+ volatile uint32_t br_prod_tail;
+ int br_prod_size;
+ int br_prod_mask;
+ uint64_t br_drops;
+ uint64_t br_prod_bufs;
+ uint64_t br_prod_bytes;
+ /*
+ * Pad out to next L2 cache line
+ */
+ uint64_t _pad0[11];
+
+ volatile uint32_t br_cons_head;
+ volatile uint32_t br_cons_tail;
+ int br_cons_size;
+ int br_cons_mask;
+
+ /*
+ * Pad out to next L2 cache line
+ */
+ uint64_t _pad1[14];
+#ifdef DEBUG_BUFRING
+ struct mtx *br_lock;
+#endif
+ void *br_ring[0];
+};
+
+/*
+ * multi-producer safe lock-free ring buffer enqueue
+ *
+ */
+static __inline int
+buf_ring_enqueue_bytes(struct buf_ring *br, void *buf, int nbytes)
+{
+ uint32_t prod_head, prod_next;
+ uint32_t cons_tail;
+ int success;
+#ifdef DEBUG_BUFRING
+ int i;
+ for (i = br->br_cons_head; i != br->br_prod_head;
+ i = ((i + 1) & br->br_cons_mask))
+ if(br->br_ring[i] == buf)
+ panic("buf=%p already enqueue at %d prod=%d cons=%d",
+ buf, i, br->br_prod_tail, br->br_cons_tail);
+#endif
+ critical_enter();
+ do {
+ prod_head = br->br_prod_head;
+ cons_tail = br->br_cons_tail;
+
+ prod_next = (prod_head + 1) & br->br_prod_mask;
+
+ if (prod_next == cons_tail) {
+ critical_exit();
+ return (ENOBUFS);
+ }
+
+ success = atomic_cmpset_int(&br->br_prod_head, prod_head,
+ prod_next);
+ } while (success == 0);
+#ifdef DEBUG_BUFRING
+ if (br->br_ring[prod_head] != NULL)
+ panic("dangling value in enqueue");
+#endif
+ br->br_ring[prod_head] = buf;
+ wmb();
+
+ /*
+ * If there are other enqueues in progress
+ * that preceeded us, we need to wait for them
+ * to complete
+ */
+ while (br->br_prod_tail != prod_head)
+ cpu_spinwait();
+ br->br_prod_bufs++;
+ br->br_prod_bytes += nbytes;
+ br->br_prod_tail = prod_next;
+ critical_exit();
+ return (0);
+}
+
+static __inline int
+buf_ring_enqueue(struct buf_ring *br, void *buf)
+{
+
+ return (buf_ring_enqueue_bytes(br, buf, 0));
+}
+
+/*
+ * multi-consumer safe dequeue
+ *
+ */
+static __inline void *
+buf_ring_dequeue_mc(struct buf_ring *br)
+{
+ uint32_t cons_head, cons_next;
+ uint32_t prod_tail;
+ void *buf;
+ int success;
+
+ critical_enter();
+ do {
+ cons_head = br->br_cons_head;
+ prod_tail = br->br_prod_tail;
+
+ cons_next = (cons_head + 1) & br->br_cons_mask;
+
+ if (cons_head == prod_tail) {
+ critical_exit();
+ return (NULL);
+ }
+
+ success = atomic_cmpset_int(&br->br_cons_head, cons_head,
+ cons_next);
+ } while (success == 0);
+
+ buf = br->br_ring[cons_head];
+#ifdef DEBUG_BUFRING
+ br->br_ring[cons_head] = NULL;
+#endif
+ rmb();
+
+ /*
+ * If there are other dequeues in progress
+ * that preceeded us, we need to wait for them
+ * to complete
+ */
+ while (br->br_cons_tail != cons_head)
+ cpu_spinwait();
+
+ br->br_cons_tail = cons_next;
+ critical_exit();
+
+ return (buf);
+}
+
+/*
+ * single-consumer dequeue
+ * use where dequeue is protected by a lock
+ * e.g. a network driver's tx queue lock
+ */
+static __inline void *
+buf_ring_dequeue_sc(struct buf_ring *br)
+{
+ uint32_t cons_head, cons_next, cons_next_next;
+ uint32_t prod_tail;
+ void *buf;
+
+ cons_head = br->br_cons_head;
+ prod_tail = br->br_prod_tail;
+
+ cons_next = (cons_head + 1) & br->br_cons_mask;
+ cons_next_next = (cons_head + 2) & br->br_cons_mask;
+
+ if (cons_head == prod_tail)
+ return (NULL);
+
+#ifdef PREFETCH_DEFINED
+ if (cons_next != prod_tail) {
+ prefetch(br->br_ring[cons_next]);
+ if (cons_next_next != prod_tail)
+ prefetch(br->br_ring[cons_next_next]);
+ }
+#endif
+ br->br_cons_head = cons_next;
+ buf = br->br_ring[cons_head];
+
+#ifdef DEBUG_BUFRING
+ br->br_ring[cons_head] = NULL;
+ if (!mtx_owned(br->br_lock))
+ panic("lock not held on single consumer dequeue");
+ if (br->br_cons_tail != cons_head)
+ panic("inconsistent list cons_tail=%d cons_head=%d",
+ br->br_cons_tail, cons_head);
+#endif
+ br->br_cons_tail = cons_next;
+ return (buf);
+}
+
+/*
+ * return a pointer to the first entry in the ring
+ * without modifying it, or NULL if the ring is empty
+ * race-prone if not protected by a lock
+ */
+static __inline void *
+buf_ring_peek(struct buf_ring *br)
+{
+
+#ifdef DEBUG_BUFRING
+ if ((br->br_lock != NULL) && !mtx_owned(br->br_lock))
+ panic("lock not held on single consumer dequeue");
+#endif
+ /*
+ * I believe it is safe to not have a memory barrier
+ * here because we control cons and tail is worst case
+ * a lagging indicator so we worst case we might
+ * return NULL immediately after a buffer has been enqueued
+ */
+ if (br->br_cons_head == br->br_prod_tail)
+ return (NULL);
+
+ return (br->br_ring[br->br_cons_head]);
+}
+
+static __inline int
+buf_ring_full(struct buf_ring *br)
+{
+
+ return (((br->br_prod_head + 1) & br->br_prod_mask) == br->br_cons_tail);
+}
+
+static __inline int
+buf_ring_empty(struct buf_ring *br)
+{
+
+ return (br->br_cons_head == br->br_prod_tail);
+}
+
+static __inline int
+buf_ring_count(struct buf_ring *br)
+{
+
+ return ((br->br_prod_size + br->br_prod_tail - br->br_cons_tail)
+ & br->br_prod_mask);
+}
+
+struct buf_ring *buf_ring_alloc(int count, struct malloc_type *type, int flags,
+ struct mtx *);
+void buf_ring_free(struct buf_ring *br, struct malloc_type *type);
+
+
+
+#endif
[-- Attachment #3 --]
From 6ef23c48a71766641f47716a9e3dc275c2315a2e Mon Sep 17 00:00:00 2001
From: Arnaud Lacombe <lacombar@gmail.com>
Date: Mon, 12 Sep 2011 22:32:45 -0400
Subject: [PATCH] IGB_MULTIQUEUE
---
sys/dev/e1000/if_igb.c | 34 ++++++++++++++++++----------------
sys/dev/e1000/if_igb.h | 4 ++--
2 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c
index 4944e56..de1fae9 100644
--- a/sys/dev/e1000/if_igb.c
+++ b/sys/dev/e1000/if_igb.c
@@ -42,7 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
#include <sys/buf_ring.h>
#endif
#include <sys/bus.h>
@@ -172,7 +172,7 @@ static int igb_detach(device_t);
static int igb_shutdown(device_t);
static int igb_suspend(device_t);
static int igb_resume(device_t);
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
static int igb_mq_start(struct ifnet *, struct mbuf *);
static int igb_mq_start_locked(struct ifnet *,
struct tx_ring *, struct mbuf *);
@@ -800,7 +800,7 @@ igb_resume(device_t dev)
{
struct adapter *adapter = device_get_softc(dev);
struct ifnet *ifp = adapter->ifp;
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
struct tx_ring *txr = adapter->tx_rings;
#endif
@@ -810,7 +810,7 @@ igb_resume(device_t dev)
if ((ifp->if_flags & IFF_UP) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
-#if __FreeBSD_version < 800000
+#ifndef IGB_MULTIQUEUE
igb_start(ifp);
#else
for (int i = 0; i < adapter->num_queues; i++, txr++) {
@@ -828,7 +828,7 @@ igb_resume(device_t dev)
}
-#if __FreeBSD_version < 800000
+#ifndef IGB_MULTIQUEUE
/*********************************************************************
* Transmit entry point
*
@@ -905,7 +905,7 @@ igb_start(struct ifnet *ifp)
return;
}
-#else /* __FreeBSD_version >= 800000 */
+#else /* defined(IGB_MULTIQUEUE) */
/*
** Multiqueue Transmit driver
**
@@ -918,9 +918,11 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m)
struct tx_ring *txr;
int i = 0, err = 0;
+#if __FreeBSD_version >= 800000
/* Which queue to use */
if ((m->m_flags & M_FLOWID) != 0)
i = m->m_pkthdr.flowid % adapter->num_queues;
+#endif
txr = &adapter->tx_rings[i];
que = &adapter->queues[i];
@@ -1024,7 +1026,7 @@ igb_qflush(struct ifnet *ifp)
}
if_qflush(ifp);
}
-#endif /* __FreeBSD_version < 800000 */
+#endif /* !defined(IGB_MULTIQUEUE) */
/*********************************************************************
* Ioctl entry point
@@ -1358,7 +1360,7 @@ igb_handle_que(void *context, int pending)
IGB_TX_LOCK(txr);
if (igb_txeof(txr))
more = TRUE;
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
igb_mq_start_locked(ifp, txr, NULL);
#else
@@ -1482,7 +1484,7 @@ igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
do {
more = igb_txeof(txr);
} while (loop-- && more);
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
if (!drbr_empty(ifp, txr->br))
igb_mq_start_locked(ifp, txr, NULL);
#else
@@ -2235,7 +2237,7 @@ igb_allocate_legacy(struct adapter *adapter)
{
device_t dev = adapter->dev;
struct igb_queue *que = adapter->queues;
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
struct tx_ring *txr = adapter->tx_rings;
#endif
int error, rid = 0;
@@ -2256,7 +2258,7 @@ igb_allocate_legacy(struct adapter *adapter)
return (ENXIO);
}
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
#endif
@@ -2330,7 +2332,7 @@ igb_allocate_msix(struct adapter *adapter)
*/
if (adapter->num_queues > 1)
bus_bind_intr(dev, que->res, i);
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
que->txr);
#endif
@@ -2832,7 +2834,7 @@ igb_setup_interface(device_t dev, struct adapter *adapter)
ifp->if_softc = adapter;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_ioctl = igb_ioctl;
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
ifp->if_transmit = igb_mq_start;
ifp->if_qflush = igb_qflush;
#else
@@ -3076,7 +3078,7 @@ igb_allocate_queues(struct adapter *adapter)
error = ENOMEM;
goto err_tx_desc;
}
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
/* Allocate a buf ring */
txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
M_WAITOK, &txr->tx_mtx);
@@ -3137,7 +3139,7 @@ err_tx_desc:
igb_dma_free(adapter, &txr->txdma);
free(adapter->rx_rings, M_DEVBUF);
rx_fail:
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
buf_ring_free(txr->br, M_DEVBUF);
#endif
free(adapter->tx_rings, M_DEVBUF);
@@ -3381,7 +3383,7 @@ igb_free_transmit_buffers(struct tx_ring *txr)
tx_buffer->map = NULL;
}
}
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
if (txr->br != NULL)
buf_ring_free(txr->br, M_DEVBUF);
#endif
diff --git a/sys/dev/e1000/if_igb.h b/sys/dev/e1000/if_igb.h
index 80abf6e..85dbcef 100644
--- a/sys/dev/e1000/if_igb.h
+++ b/sys/dev/e1000/if_igb.h
@@ -293,7 +293,7 @@ struct tx_ring {
u32 next_to_clean;
volatile u16 tx_avail;
struct igb_tx_buffer *tx_buffers;
-#if __FreeBSD_version >= 800000
+#ifdef IGB_MULTIQUEUE
struct buf_ring *br;
#endif
bus_dma_tag_t txtag;
@@ -527,7 +527,7 @@ igb_rx_unrefreshed(struct rx_ring *rxr)
cur |= new; \
}
-#if __FreeBSD_version >= 800000 && __FreeBSD_version < 800504
+#if defined(IGB_MULTIQUEUE) && __FreeBSD_version < 800504
static __inline int
drbr_needs_enqueue(struct ifnet *ifp, struct buf_ring *br)
{
--
1.7.6.153.g78432
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CACqU3MV7JRxQ_mNeHCk7RVyzETZLAcc3XL=xyZ-qqtPfRxkZeQ>
