Date: Tue, 5 Feb 2013 14:30:36 -0500 From: Randall Stewart <rrs@lakerest.net> To: John Baldwin <jhb@freebsd.org> Cc: freebsd-net@freebsd.org, Robert Watson <rwatson@freebsd.org>, Jack Vogel <jfvogel@gmail.com>, Kip Macy <kmacy@freebsd.org> Subject: Re: Driver patch to look at... Message-ID: <74624C61-0564-420F-9F82-0475488F857C@lakerest.net> In-Reply-To: <201302051411.52495.jhb@freebsd.org> References: <D3AA078A-CD19-4228-A019-BE9C985895E2@lakerest.net> <990BD290-643B-4BC7-8D64-6D4CE987025A@lakerest.net> <00075DDD-73A1-4CA4-9574-036D43B071D9@lakerest.net> <201302051411.52495.jhb@freebsd.org>
index | next in thread | previous in thread | raw e-mail
[-- Attachment #1 --]
Ok
Here it is one last time (I hope) with the updates ;-)
R
[-- Attachment #2 --]
Index: dev/e1000/if_em.c
===================================================================
--- dev/e1000/if_em.c (revision 246357)
+++ dev/e1000/if_em.c (working copy)
@@ -905,22 +905,24 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ri
}
enq = 0;
- if (m == NULL) {
- next = drbr_dequeue(ifp, txr->br);
- } else if (drbr_needs_enqueue(ifp, txr->br)) {
- if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
+ if (m != NULL) {
+ err = drbr_enqueue(ifp, txr->br, m);
+ if (err) {
return (err);
- next = drbr_dequeue(ifp, txr->br);
- } else
- next = m;
+ }
+ }
/* Process the queue */
- while (next != NULL) {
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
if ((err = em_xmit(txr, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(ifp, txr->br, next);
- break;
+ if (next == NULL) {
+ drbr_advance(ifp, txr->br);
+ } else {
+ drbr_putback(ifp, txr->br, next);
+ }
+ break;
}
+ drbr_advance(ifp, txr->br);
enq++;
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
@@ -928,7 +930,6 @@ em_mq_start_locked(struct ifnet *ifp, struct tx_ri
ETHER_BPF_MTAP(ifp, next);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
- next = drbr_dequeue(ifp, txr->br);
}
if (enq > 0) {
Index: dev/e1000/if_igb.c
===================================================================
--- dev/e1000/if_igb.c (revision 246357)
+++ dev/e1000/if_igb.c (working copy)
@@ -965,12 +965,13 @@ igb_mq_start(struct ifnet *ifp, struct mbuf *m)
** out-of-order delivery, but
** settle for it if that fails
*/
- if (m)
+ if (m != NULL)
drbr_enqueue(ifp, txr->br, m);
err = igb_mq_start_locked(ifp, txr);
IGB_TX_UNLOCK(txr);
} else {
- err = drbr_enqueue(ifp, txr->br, m);
+ if (m != NULL)
+ err = drbr_enqueue(ifp, txr->br, m);
taskqueue_enqueue(que->tq, &txr->txq_task);
}
@@ -994,12 +995,22 @@ igb_mq_start_locked(struct ifnet *ifp, struct tx_r
enq = 0;
/* Process the queue */
- while ((next = drbr_dequeue(ifp, txr->br)) != NULL) {
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
if ((err = igb_xmit(txr, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(ifp, txr->br, next);
+ if (next == NULL) {
+ /* It was freed, move forward */
+ drbr_advance(ifp, txr->br);
+ } else {
+ /*
+ * Still have one left, it may not be
+ * the same since the transmit function
+ * may have changed it.
+ */
+ drbr_putback(ifp, txr->br, next);
+ }
break;
}
+ drbr_advance(ifp, txr->br);
enq++;
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
Index: dev/oce/oce_if.c
===================================================================
--- dev/oce/oce_if.c (revision 246357)
+++ dev/oce/oce_if.c (working copy)
@@ -1166,29 +1166,27 @@ oce_multiq_transmit(struct ifnet *ifp, struct mbuf
return status;
}
- if (m == NULL)
- next = drbr_dequeue(ifp, br);
- else if (drbr_needs_enqueue(ifp, br)) {
+ if (m != NULL) {
if ((status = drbr_enqueue(ifp, br, m)) != 0)
return status;
- next = drbr_dequeue(ifp, br);
- } else
- next = m;
-
- while (next != NULL) {
+ }
+ while ((next = drbr_peek(ifp, br)) != NULL) {
if (oce_tx(sc, &next, queue_index)) {
- if (next != NULL) {
+ if (next == NULL) {
+ drbr_advance(ifp, br);
+ } else {
+ drbr_putback(ifp, br, next);
wq->tx_stats.tx_stops ++;
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
status = drbr_enqueue(ifp, br, next);
}
break;
}
+ drbr_advance(ifp, br);
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
ifp->if_omcasts++;
ETHER_BPF_MTAP(ifp, next);
- next = drbr_dequeue(ifp, br);
}
return status;
Index: dev/ixgbe/ixgbe.c
===================================================================
--- dev/ixgbe/ixgbe.c (revision 246357)
+++ dev/ixgbe/ixgbe.c (working copy)
@@ -832,22 +832,24 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx
}
enqueued = 0;
- if (m == NULL) {
- next = drbr_dequeue(ifp, txr->br);
- } else if (drbr_needs_enqueue(ifp, txr->br)) {
- if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
+ if (m != NULL) {
+ err = drbr_enqueue(ifp, txr->br, m);
+ if (err) {
return (err);
- next = drbr_dequeue(ifp, txr->br);
- } else
- next = m;
+ }
+ }
/* Process the queue */
- while (next != NULL) {
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
if ((err = ixgbe_xmit(txr, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(ifp, txr->br, next);
+ if (next == NULL) {
+ drbr_advance(ifp, txr->br);
+ } else {
+ drbr_putback(ifp, txr->br, next);
+ }
break;
}
+ drbr_advance(ifp, txr->br);
enqueued++;
/* Send a copy of the frame to the BPF listener */
ETHER_BPF_MTAP(ifp, next);
@@ -855,7 +857,6 @@ ixgbe_mq_start_locked(struct ifnet *ifp, struct tx
break;
if (txr->tx_avail < IXGBE_TX_OP_THRESHOLD)
ixgbe_txeof(txr);
- next = drbr_dequeue(ifp, txr->br);
}
if (enqueued > 0) {
Index: dev/ixgbe/ixv.c
===================================================================
--- dev/ixgbe/ixv.c (revision 246357)
+++ dev/ixgbe/ixv.c (working copy)
@@ -620,22 +620,23 @@ ixv_mq_start_locked(struct ifnet *ifp, struct tx_r
ixv_txeof(txr);
enqueued = 0;
- if (m == NULL) {
- next = drbr_dequeue(ifp, txr->br);
- } else if (drbr_needs_enqueue(ifp, txr->br)) {
- if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
+ if (m != NULL) {
+ err = drbr_enqueue(ifp, txr->br, m);
+ if (err) {
return (err);
- next = drbr_dequeue(ifp, txr->br);
- } else
- next = m;
-
+ }
+ }
/* Process the queue */
- while (next != NULL) {
+ while ((next = drbr_peek(ifp, txr->br)) != NULL) {
if ((err = ixv_xmit(txr, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(ifp, txr->br, next);
+ if (next == NULL) {
+ drbr_advance(ifp, txr->br);
+ } else {
+ drbr_putback(ifp, txr->br, next);
+ }
break;
}
+ drbr_advance(ifp, txr->br);
enqueued++;
ifp->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
@@ -648,7 +649,6 @@ ixv_mq_start_locked(struct ifnet *ifp, struct tx_r
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
break;
}
- next = drbr_dequeue(ifp, txr->br);
}
if (enqueued > 0) {
Index: dev/bxe/if_bxe.c
===================================================================
--- dev/bxe/if_bxe.c (revision 246357)
+++ dev/bxe/if_bxe.c (working copy)
@@ -9506,24 +9506,15 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
BXE_FP_LOCK_ASSERT(fp);
- if (m == NULL) {
- /* No new work, check for pending frames. */
- next = drbr_dequeue(ifp, fp->br);
- } else if (drbr_needs_enqueue(ifp, fp->br)) {
- /* Both new and pending work, maintain packet order. */
+ if (m != NULL) {
rc = drbr_enqueue(ifp, fp->br, m);
if (rc != 0) {
fp->tx_soft_errors++;
goto bxe_tx_mq_start_locked_exit;
}
- next = drbr_dequeue(ifp, fp->br);
- } else
- /* New work only, nothing pending. */
- next = m;
-
+ }
/* Keep adding entries while there are frames to send. */
- while (next != NULL) {
-
+ while ((next = drbr_peek(ifp, fp->br)) != NULL) {
/* The transmit mbuf now belongs to us, keep track of it. */
fp->tx_mbuf_alloc++;
@@ -9537,23 +9528,22 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
if (__predict_false(rc != 0)) {
fp->tx_encap_failures++;
/* Very Bad Frames(tm) may have been dropped. */
- if (next != NULL) {
+ if (next == NULL) {
+ drbr_advance(ifp, fp->br);
+ } else {
+ drbr_putback(ifp, fp->br, next);
/*
* Mark the TX queue as full and save
* the frame.
*/
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
fp->tx_frame_deferred++;
-
- /* This may reorder frame. */
- rc = drbr_enqueue(ifp, fp->br, next);
fp->tx_mbuf_alloc--;
}
-
/* Stop looking for more work. */
break;
}
-
+ drbr_advance(ifp, fp->br);
/* The transmit frame was enqueued successfully. */
tx_count++;
@@ -9574,8 +9564,6 @@ bxe_tx_mq_start_locked(struct ifnet *ifp,
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
break;
}
-
- next = drbr_dequeue(ifp, fp->br);
}
/* No TX packets were dequeued. */
Index: net/if_var.h
===================================================================
--- net/if_var.h (revision 246357)
+++ net/if_var.h (working copy)
@@ -622,6 +622,45 @@ drbr_enqueue(struct ifnet *ifp, struct buf_ring *b
}
static __inline void
+drbr_putback(struct ifnet *ifp, struct buf_ring *br, struct mbuf *new)
+{
+ /*
+ * The top of the list needs to be swapped
+ * for this one.
+ */
+#ifdef ALTQ
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Peek in altq case dequeued it
+ * so put it back.
+ */
+ IFQ_DRV_PREPEND(&ifp->if_snd, new);
+ return;
+ }
+#endif
+ buf_ring_putback_sc(br, new);
+}
+
+static __inline struct mbuf *
+drbr_peek(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ struct mbuf *m;
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ /*
+ * Pull it off like a dequeue
+ * since drbr_advance() does nothing
+ * for altq and drbr_putback() will
+ * use the old prepend function.
+ */
+ IFQ_DEQUEUE(&ifp->if_snd, m);
+ return (m);
+ }
+#endif
+ return(buf_ring_peek(br));
+}
+
+static __inline void
drbr_flush(struct ifnet *ifp, struct buf_ring *br)
{
struct mbuf *m;
@@ -648,7 +687,7 @@ drbr_dequeue(struct ifnet *ifp, struct buf_ring *b
#ifdef ALTQ
struct mbuf *m;
- if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
IFQ_DEQUEUE(&ifp->if_snd, m);
return (m);
}
@@ -656,6 +695,18 @@ drbr_dequeue(struct ifnet *ifp, struct buf_ring *b
return (buf_ring_dequeue_sc(br));
}
+static __inline void
+drbr_advance(struct ifnet *ifp, struct buf_ring *br)
+{
+#ifdef ALTQ
+ /* Nothing to do here since peek dequeues in altq case */
+ if (ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd))
+ return;
+#endif
+ return (buf_ring_advance_sc(br));
+}
+
+
static __inline struct mbuf *
drbr_dequeue_cond(struct ifnet *ifp, struct buf_ring *br,
int (*func) (struct mbuf *, void *), void *arg)
Index: sys/buf_ring.h
===================================================================
--- sys/buf_ring.h (revision 246357)
+++ sys/buf_ring.h (working copy)
@@ -208,6 +208,55 @@ buf_ring_dequeue_sc(struct buf_ring *br)
}
/*
+ * single-consumer advance after a peek
+ * use where it is protected by a lock
+ * e.g. a network driver's tx queue lock
+ */
+static __inline void
+buf_ring_advance_sc(struct buf_ring *br)
+{
+ uint32_t cons_head, cons_next;
+ uint32_t prod_tail;
+
+ cons_head = br->br_cons_head;
+ prod_tail = br->br_prod_tail;
+
+ cons_next = (cons_head + 1) & br->br_cons_mask;
+ if (cons_head == prod_tail)
+ return;
+ br->br_cons_head = cons_next;
+#ifdef DEBUG_BUFRING
+ br->br_ring[cons_head] = NULL;
+#endif
+ br->br_cons_tail = cons_next;
+}
+
+/*
+ * Used to return a buffer (most likely already there)
+ * to the top od the ring. The caller should *not*
+ * have used any dequeue to pull it out of the ring
+ * but instead should have used the peek() function.
+ * This is normally used where the transmit queue
+ * of a driver is full, and an mubf must be returned.
+ * Most likely whats in the ring-buffer is what
+ * is being put back (since it was not removed), but
+ * sometimes the lower transmit function may have
+ * done a pullup or other function that will have
+ * changed it. As an optimzation we always put it
+ * back (since jhb says the store is probably cheaper),
+ * if we have to do a multi-queue version we will need
+ * the compare and an atomic.
+ */
+static __inline void
+buf_ring_putback_sc(struct buf_ring *br, void *new)
+{
+ if (br->br_cons_head == br->br_prod_tail)
+ /* Huh? */
+ return;
+ br->br_ring[br->br_cons_head] = new;
+}
+
+/*
* return a pointer to the first entry in the ring
* without modifying it, or NULL if the ring is empty
* race-prone if not protected by a lock
Index: ofed/drivers/net/mlx4/en_tx.c
===================================================================
--- ofed/drivers/net/mlx4/en_tx.c (revision 246357)
+++ ofed/drivers/net/mlx4/en_tx.c (working copy)
@@ -931,22 +931,21 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_
}
enqueued = 0;
- if (m == NULL) {
- next = drbr_dequeue(dev, ring->br);
- } else if (drbr_needs_enqueue(dev, ring->br)) {
+ if (m != NULL) {
if ((err = drbr_enqueue(dev, ring->br, m)) != 0)
return (err);
- next = drbr_dequeue(dev, ring->br);
- } else
- next = m;
-
+ }
/* Process the queue */
- while (next != NULL) {
+ while ((next = drbr_peek(ifp, ring->br)) != NULL) {
if ((err = mlx4_en_xmit(dev, tx_ind, &next)) != 0) {
- if (next != NULL)
- err = drbr_enqueue(dev, ring->br, next);
+ if (next == NULL) {
+ drbr_advance(ifp, ring->br);
+ } else {
+ drbr_putback(ifp, ring->br, next);
+ }
break;
}
+ drbr_advance(ifp, ring->br);
enqueued++;
dev->if_obytes += next->m_pkthdr.len;
if (next->m_flags & M_MCAST)
@@ -955,7 +954,6 @@ mlx4_en_transmit_locked(struct ifnet *dev, int tx_
ETHER_BPF_MTAP(dev, next);
if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
break;
- next = drbr_dequeue(dev, ring->br);
}
if (enqueued > 0)
[-- Attachment #3 --]
On Feb 5, 2013, at 2:11 PM, John Baldwin wrote:
> On Tuesday, February 05, 2013 2:08:15 pm Randall Stewart wrote:
>> Hmm
>>
>> wait, I could probably do the compare to whats in the ring buffer ;-D
>
> I wouldn't bother. The compare and branch is probably more expensive than
> the store.
>
>> R
>> On Feb 5, 2013, at 2:04 PM, Randall Stewart wrote:
>>
>>> Hmm
>>>
>>> That would trade off a stack pointer + a compare
>>> vs always doing the move.
>>>
>>> Thats fine until I have to add the _mc() version, then the put
>>> back would be an atomic, and most of the time the return from
>>> this is probably not changed…
>>>
>>> I really would prefer not to since the compare and maybe store vs
>>> the always store.. though the same now, would be far more expensive
>>> in the _mc version.. if we do a _mc version of course ;-)
>>>
>>> But I am willing to do whatever .. since this really needs to be fixed.
>>>
>>> R
>>> On Feb 5, 2013, at 1:52 PM, John Baldwin wrote:
>>>
>>>> On Tuesday, February 05, 2013 12:44:01 pm Randall Stewart wrote:
>>>>> Actually, no it is used.
>>>>>
>>>>> If you look in if_var.h int he drbr_putback() function, it does
>>>>> a buf_ring_swap when the old mbuf pointer does not equal the
>>>>> new mbuf pointer. This *does* happen, I crashed at least once
>>>>> yesterday when the igb driver did something to free the original
>>>>> mbuf and return a new mbuf with the data (prepend or some such).
>>>>>
>>>>> I also have found several issues that I have fixed this morning.. its been
>>>>> crash city on my test beds..
>>>>>
>>>>> Here is the latest patch with all fixes and suggested changes from emaste
>>>> (thanks Ed)
>>>>
>>>> Actually, one more suggestion then (since you have to keep putback). It
>>>> would be nice to not have to require 'snext' in all the callers. How
>>>> about replace buf_ring_swap() with a buf_ring_putback_sc() that accepts the
>>>> mbuf and just stores it at the head unconditionally and have drbr_putback()
>>>> use that?
>>>>
>>>> --
>>>> John Baldwin
>>>> _______________________________________________
>>>> freebsd-net@freebsd.org mailing list
>>>> http://lists.freebsd.org/mailman/listinfo/freebsd-net
>>>> To unsubscribe, send any mail to "freebsd-net-unsubscribe@freebsd.org"
>>>>
>>>
>>> ------------------------------
>>> Randall Stewart
>>> 803-317-4952 (cell)
>>>
>>> _______________________________________________
>>> freebsd-net@freebsd.org mailing list
>>> http://lists.freebsd.org/mailman/listinfo/freebsd-net
>>> To unsubscribe, send any mail to "freebsd-net-unsubscribe@freebsd.org"
>>>
>>
>> ------------------------------
>> Randall Stewart
>> 803-317-4952 (cell)
>>
>>
>
> --
> John Baldwin
> _______________________________________________
> freebsd-net@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-net
> To unsubscribe, send any mail to "freebsd-net-unsubscribe@freebsd.org"
>
------------------------------
Randall Stewart
803-317-4952 (cell)
help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?74624C61-0564-420F-9F82-0475488F857C>
