Date: Tue, 5 Feb 2013 12:44:01 -0500 From: Randall Stewart <rrs@lakerest.net> To: John Baldwin <jhb@freebsd.org> Cc: freebsd-net@freebsd.org, Robert Watson <rwatson@freebsd.org>, Jack Vogel <jfvogel@gmail.com>, Kip Macy <kmacy@freebsd.org> Subject: Re: Driver patch to look at... Message-ID: <0D421326-9A80-4E21-A18E-E717F5C02164@lakerest.net> In-Reply-To: <201302051213.51401.jhb@freebsd.org> References: <D3AA078A-CD19-4228-A019-BE9C985895E2@lakerest.net> <39571D84-A8C0-46A4-8EFA-CF74D862EAAE@lakerest.net> <B02D5035-650B-4958-85E9-B6D984E720D5@lakerest.net> <201302051213.51401.jhb@freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
Actually, no it is used.
If you look in if_var.h int he drbr_putback() function, it does
a buf_ring_swap when the old mbuf pointer does not equal the
new mbuf pointer. This *does* happen, I crashed at least once
yesterday when the igb driver did something to free the original
mbuf and return a new mbuf with the data (prepend or some such).
I also have found several issues that I have fixed this morning.. its been
crash city on my test beds..
Here is the latest patch with all fixes and suggested changes from emaste (thanks Ed)
R
[-- Attachment #2 --]
Index: dev/e1000/if_em.c
===================================================================
--- dev/e1000/if_em.c (revision 246357)
+++ dev/e1000/if_em.c (working copy)
@@ -894,7 +894,7 @@ static int
em_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
{
struct adapter *adapter = txr->adapter;
- struct mbuf *next;
+ struct mbuf *next, *snext;
int err = 0, enq = 0;
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
@@ -905,22 +905,25 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ri
}
enq = 0;
- if (m == NULL) {
- next = drbr_dequeue(ifp, txr->br);
- } else if (drbr_needs_enqueue(ifp, txr->br)) {
- if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
+ if (m != NULL) {
+ err = drbr_enqueue(ifp, txr->br, m);
+ if (err) {
return (err);
- next = drbr_dequeue(ifp, txr->br);
- } else
- next = m;
+ }
+ }
/* Process the queue */
- while (next != NULL) {
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
+ snext = next;
if ((err = em_xmit(txr, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(ifp, txr->br, next);
- break;
+ if (next == NULL) {
+ drbr_advance(ifp, txr->br);
+ } else {
+ drbr_putback(ifp, txr->br, next, snext);
+ }
+ break;
}
+ drbr_advance(ifp, txr->br);
enq++;
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
@@ -928,7 +931,6 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ri
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
- next = drbr_dequeue(ifp, txr->br);
}
if (enq > 0) {
Index: dev/e1000/if_igb.c
===================================================================
--- dev/e1000/if_igb.c (revision 246357)
+++ dev/e1000/if_igb.c (working copy)
@@ -965,12 +965,13 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m)
** out-of-order delivery, but
** settle for it if that fails
*/
- if (m)
+ if (m != NULL)
drbr_enqueue(ifp, txr->br, m);
err = igb_mq_start_locked(ifp, txr);
IGB_TX_UNLOCK(txr);
} else {
- err = drbr_enqueue(ifp, txr->br, m);
+ if (m != NULL)
+ err = drbr_enqueue(ifp, txr->br, m);
taskqueue_enqueue(que->tq, &txr->txq_task);
}
@@ -981,7 +982,7 @@ static int
igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
{
struct adapter *adapter = txr->adapter;
- struct mbuf *next;
+ struct mbuf *next, *snext;
int err = 0, enq;
IGB_TX_LOCK_ASSERT(txr);
@@ -994,12 +995,23 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_r
enq = 0;
/* Process the queue */
- while ((next = drbr_dequeue(ifp, txr->br)) != NULL) {
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
+ snext = next;
if ((err = igb_xmit(txr, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(ifp, txr->br, next);
+ if (next == NULL) {
+ /* It was freed, move forward */
+ drbr_advance(ifp, txr->br);
+ } else {
+ /*
+ * Still have one left, it may not be
+ * the same since the transmit function
+ * may have changed it.
+ */
+ drbr_putback(ifp, txr->br, next, snext);
+ }
break;
}
+ drbr_advance(ifp, txr->br);
enq++;
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
Index: dev/oce/oce_if.c
===================================================================
--- dev/oce/oce_if.c (revision 246357)
+++ dev/oce/oce_if.c (working copy)
@@ -1154,6 +1154,7 @@ oce_multiq_transmit(struct ifnet *ifp, struct mbuf
POCE_SOFTC sc = ifp->if_softc;
int status = 0, queue_index = 0;
struct mbuf *next = NULL;
+ struct mbuf *snext;
struct buf_ring *br = NULL;
br = wq->br;
@@ -1166,29 +1167,28 @@ oce_multiq_transmit(struct ifnet *ifp, struct mbuf
return status;
}
- if (m == NULL)
- next = drbr_dequeue(ifp, br);
- else if (drbr_needs_enqueue(ifp, br)) {
+ if (m != NULL) {
if ((status = drbr_enqueue(ifp, br, m)) != 0)
return status;
- next = drbr_dequeue(ifp, br);
- } else
- next = m;
-
- while (next != NULL) {
+ }
+ while ((next = drbr_peek(ifp, br)) != NULL) {
+ snext = next;
if (oce_tx(sc, &next, queue_index)) {
- if (next != NULL) {
+ if (next == NULL) {
+ drbr_advance(ifp, br);
+ } else {
+ drbr_putback(ifp, br, next, snext);
wq->tx_stats.tx_stops ++;
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
status = drbr_enqueue(ifp, br, next);
}
break;
}
+ drbr_advance(ifp, br);
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
ifp->if_omcasts++;
ETHER_BPF_MTAP(ifp, next);
- next = drbr_dequeue(ifp, br);
}
return status;
Index: dev/ixgbe/ixgbe.c
===================================================================
--- dev/ixgbe/ixgbe.c (revision 246357)
+++ dev/ixgbe/ixgbe.c (working copy)
@@ -821,7 +821,7 @@ static int
ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
{
struct adapter *adapter = txr->adapter;
- struct mbuf *next;
+ struct mbuf *next, *snext;
int enqueued, err = 0;
if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
@@ -832,22 +832,25 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx
}
enqueued = 0;
- if (m == NULL) {
- next = drbr_dequeue(ifp, txr->br);
- } else if (drbr_needs_enqueue(ifp, txr->br)) {
- if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
+ if (m != NULL) {
+ err = drbr_enqueue(ifp, txr->br, m);
+ if (err) {
return (err);
- next = drbr_dequeue(ifp, txr->br);
- } else
- next = m;
+ }
+ }
/* Process the queue */
- while (next != NULL) {
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
+ snext = next;
if ((err = ixgbe_xmit(txr, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(ifp, txr->br, next);
+ if (next == NULL) {
+ drbr_advance(ifp, txr->br);
+ } else {
+ drbr_putback(ifp, txr->br, next, snext);
+ }
break;
}
+ drbr_advance(ifp, txr->br);
enqueued++;
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, next);
@@ -855,7 +858,6 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx
break;
if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
ixgbe_txeof(txr);
- next = drbr_dequeue(ifp, txr->br);
}
if (enqueued > 0) {
Index: dev/ixgbe/ixv.c
===================================================================
--- dev/ixgbe/ixv.c (revision 246357)
+++ dev/ixgbe/ixv.c (working copy)
@@ -605,7 +605,7 @@ static int
ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
{
struct adapter *adapter = txr->adapter;
- struct mbuf *next;
+ struct mbuf *next, *snext;
int enqueued, err = 0;
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
@@ -620,22 +620,24 @@ ixv_mq_start_locked(struct ifnet *ifp, struct tx_r
ixv_txeof(txr);
enqueued = 0;
- if (m == NULL) {
- next = drbr_dequeue(ifp, txr->br);
- } else if (drbr_needs_enqueue(ifp, txr->br)) {
- if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
+ if (m != NULL) {
+ err = drbr_enqueue(ifp, txr->br, m);
+ if (err) {
return (err);
- next = drbr_dequeue(ifp, txr->br);
- } else
- next = m;
-
+ }
+ }
/* Process the queue */
- while (next != NULL) {
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
+ snext = next;
if ((err = ixv_xmit(txr, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(ifp, txr->br, next);
+ if (next == NULL) {
+ drbr_advance(ifp, txr->br);
+ } else {
+ drbr_putback(ifp, txr->br, next, snext);
+ }
break;
}
+ drbr_advance(ifp, txr->br);
enqueued++;
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
@@ -648,7 +650,6 @@ ixv_mq_start_locked(struct ifnet *ifp, struct tx_r
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
break;
}
- next = drbr_dequeue(ifp, txr->br);
}
if (enqueued > 0) {
Index: dev/bxe/if_bxe.c
===================================================================
--- dev/bxe/if_bxe.c (revision 246357)
+++ dev/bxe/if_bxe.c (working copy)
@@ -9491,7 +9491,7 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
struct bxe_fastpath *fp, struct mbuf *m)
{
struct bxe_softc *sc;
- struct mbuf *next;
+ struct mbuf *next, *snext;
int depth, rc, tx_count;
sc = fp->sc;
@@ -9506,24 +9506,16 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
BXE_FP_LOCK_ASSERT(fp);
- if (m == NULL) {
- /* No new work, check for pending frames. */
- next = drbr_dequeue(ifp, fp->br);
- } else if (drbr_needs_enqueue(ifp, fp->br)) {
- /* Both new and pending work, maintain packet order. */
+ if (m != NULL) {
rc = drbr_enqueue(ifp, fp->br, m);
if (rc != 0) {
fp->tx_soft_errors++;
goto bxe_tx_mq_start_locked_exit;
}
- next = drbr_dequeue(ifp, fp->br);
- } else
- /* New work only, nothing pending. */
- next = m;
-
+ }
/* Keep adding entries while there are frames to send. */
- while (next != NULL) {
-
+ while ((next = drbr_peek(ifp, fp->br)) != NULL) {
+ snext = next;
/* The transmit mbuf now belongs to us, keep track of it. */
fp->tx_mbuf_alloc++;
@@ -9537,23 +9529,22 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
if (__predict_false(rc != 0)) {
fp->tx_encap_failures++;
/* Very Bad Frames(tm) may have been dropped. */
- if (next != NULL) {
+ if (next == NULL) {
+ drbr_advance(ifp, fp->br);
+ } else {
+ drbr_putback(ifp, fp->br, next, snext);
/*
* Mark the TX queue as full and save
* the frame.
*/
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
fp->tx_frame_deferred++;
-
- /* This may reorder frame. */
- rc = drbr_enqueue(ifp, fp->br, next);
fp->tx_mbuf_alloc--;
}
-
/* Stop looking for more work. */
break;
}
-
+ drbr_advance(ifp, fp->br);
/* The transmit frame was enqueued successfully. */
tx_count++;
@@ -9574,8 +9565,6 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
break;
}
-
- next = drbr_dequeue(ifp, fp->br);
}
/* No TX packets were dequeued. */
Index: net/if_var.h
===================================================================
--- net/if_var.h (revision 246357)
+++ net/if_var.h (working copy)
@@ -622,6 +622,46 @@ drbr_enqueue(struct ifnet *ifp, struct buf_ring *b
}
static __inline void
+drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new, struct mbuf *prev)
+{
+ /*
+ * The top of the list needs to be swapped
+ * for this one.
+ */
+#ifdef ALTQ
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Peek in altq case dequeued it
+ * so put it back.
+ */
+ IFQ_DRV_PREPEND(&ifp->if_snd, new);
+ return;
+ }
+#endif
+ if (new != prev)
+ buf_ring_swap(br, new, prev);
+}
+
+static __inline struct mbuf *
+drbr_peek(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ struct mbuf *m;
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Pull it off like a dequeue
+ * since drbr_advance() does nothing
+ * for altq and drbr_putback() will
+ * use the old prepend function.
+ */
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ return(buf_ring_peek(br));
+}
+
+static __inline void
drbr_flush(struct ifnet *ifp, struct buf_ring *br)
{
struct mbuf *m;
@@ -648,7 +688,7 @@ drbr_dequeue(struct ifnet *ifp, struct buf_ring *b
#ifdef ALTQ
struct mbuf *m;
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
IFQ_DEQUEUE(&ifp->if_snd, m);
return (m);
}
@@ -656,6 +696,18 @@ drbr_dequeue(struct ifnet *ifp, struct buf_ring *b
return (buf_ring_dequeue_sc(br));
}
+static __inline void
+drbr_advance(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ /* Nothing to do here since peek dequeues in altq case */
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+ return;
+#endif
+ return (buf_ring_advance_sc(br));
+}
+
+
static __inline struct mbuf *
drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
int (*func) (struct mbuf *, void *), void *arg)
Index: sys/buf_ring.h
===================================================================
--- sys/buf_ring.h (revision 246357)
+++ sys/buf_ring.h (working copy)
@@ -208,6 +208,51 @@ buf_ring_dequeue_sc(struct buf_ring *br)
}
/*
+ * single-consumer advance after a peek
+ * use where it is protected by a lock
+ * e.g. a network driver's tx queue lock
+ */
+static __inline void
+buf_ring_advance_sc(struct buf_ring *br)
+{
+ uint32_t cons_head, cons_next;
+ uint32_t prod_tail;
+
+ cons_head = br->br_cons_head;
+ prod_tail = br->br_prod_tail;
+
+ cons_next = (cons_head + 1) & br->br_cons_mask;
+ if (cons_head == prod_tail)
+ return;
+ br->br_cons_head = cons_next;
+#ifdef DEBUG_BUFRING
+ br->br_ring[cons_head] = NULL;
+#endif
+ br->br_cons_tail = cons_next;
+}
+
+/*
+ * Used to return a differnt mbuf to the
+ * top of the ring. This can happen if
+ * the driver changed the packets (some defragmentation
+ * for example) and then realized the transmit
+ * ring was full. In such a case the old packet
+ * is now freed, but we want the order of the actual
+ * data (being sent in the new packet) to remain
+ * the same.
+ */
+static __inline void
+buf_ring_swap(struct buf_ring *br, void *new, void *old)
+{
+ int ret;
+ if (br->br_cons_head == br->br_prod_tail)
+ /* Huh? */
+ return;
+ ret = atomic_cmpset_long((uint64_t *)&br->br_ring[br->br_cons_head], (uint64_t)old, (uint64_t)new);
+ KASSERT(ret, ("Swap out failed old:%p new:%p ret:%d", old, new, ret));
+}
+
+/*
* return a pointer to the first entry in the ring
* without modifying it, or NULL if the ring is empty
* race-prone if not protected by a lock
Index: ofed/drivers/net/mlx4/en_tx.c
===================================================================
--- ofed/drivers/net/mlx4/en_tx.c (revision 246357)
+++ ofed/drivers/net/mlx4/en_tx.c (working copy)
@@ -919,7 +919,7 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_tx_ring *ring;
- struct mbuf *next;
+ struct mbuf *next, *snext;
int enqueued, err = 0;
ring = &priv->tx_ring[tx_ind];
@@ -931,22 +931,22 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_
}
enqueued = 0;
- if (m == NULL) {
- next = drbr_dequeue(dev, ring->br);
- } else if (drbr_needs_enqueue(dev, ring->br)) {
+ if (m != NULL) {
if ((err = drbr_enqueue(dev, ring->br, m)) != 0)
return (err);
- next = drbr_dequeue(dev, ring->br);
- } else
- next = m;
-
+ }
/* Process the queue */
- while (next != NULL) {
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
+ snext = next;
if ((err = mlx4_en_xmit(dev, tx_ind, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(dev, ring->br, next);
+ if (next == NULL) {
+ drbr_advance(ifp, txr->br);
+ } else {
+ drbr_putback(ifp, txr->br, next, snext);
+ }
break;
}
+ drbr_advance(ifp, txr->br);
enqueued++;
dev->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
@@ -955,7 +955,6 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_
ETHER_BPF_MTAP(dev, next);
if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
- next = drbr_dequeue(dev, ring->br);
}
if (enqueued > 0)
[-- Attachment #3 --]
On Feb 5, 2013, at 12:13 PM, John Baldwin wrote:
> On Tuesday, February 05, 2013 10:24:24 am Randall Stewart wrote:
>> Here is an updated patch… sigh.. I foobar'd the ALTQ stuff.. lots of crashes
> ;-D
>
> Heh, I like this better, thanks. I think you can remove buf_ring_swap() as it
> is no longer used?
>
> --
> John Baldwin
> _______________________________________________
> freebsd-net@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-net
> To unsubscribe, send any mail to "freebsd-net-unsubscribe@freebsd.org"
>
------------------------------
Randall Stewart
803-317-4952 (cell)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?0D421326-9A80-4E21-A18E-E717F5C02164>
