Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 8 Sep 2011 15:04:35 -0400
From:      Arnaud Lacombe <lacombar@gmail.com>
To:        Sean Bruno <seanbru@yahoo-inc.com>
Cc:        "freebsd-net@freebsd.org" <freebsd-net@freebsd.org>, Jack Vogel <jfvogel@gmail.com>
Subject:   Re: FreeBSD 7-STABLE mbuf corruption
Message-ID:  <CACqU3MXLm83AxNkfYkhaJzjE1_iJgw-kgAC=cvNsqxL2HQyGqg@mail.gmail.com>
In-Reply-To: <1315441138.2872.13.camel@hitfishpass-lx.corp.yahoo.com>
References:  <CACqU3MUs9Z9GeuGe=8iVp=MWV6eG-tO%2BkHb1znatsTq2uEqwvA@mail.gmail.com> <CACqU3MXf52tLajTfVCEiGGhtCuXsesrdM65LfsoGecuZj2tNwA@mail.gmail.com> <1315441138.2872.13.camel@hitfishpass-lx.corp.yahoo.com>

next in thread | previous in thread | raw e-mail | index | archive | help
Hi,

On Wed, Sep 7, 2011 at 8:18 PM, Sean Bruno <seanbru@yahoo-inc.com> wrote:
> [...]
> In addition to some txeof() handling patches ... Jack can see if this is
> still relevant in the freebsd7 universe. Note that Yahoo is "special"
> and we pluck and chuck code/drivers/whatever at will so I don't know how
> this code will apply to stable-7 or if it has been applied already.
>
> Note that this code "rage-quits" the EIAM/EIAC auto ack strategy and
> tries to handle the OACTIVE state handling better. =A0I hope these clues
> find you well.
>
Just for the record, I'll comment inline.

> --- //depot/yahoo/ybsd_7/src/sys/dev/e1000/if_igb.c =A0 =A0 2010-11-29
> 20:47:23.000000000 0000
> +++ /home/seanbru/ybsd_7/src/sys/dev/e1000/if_igb.c =A0 =A0 2010-11-29
> 20:47:23.000000000 0000
> @@ -30,7 +30,7 @@
> =A0 POSSIBILITY OF SUCH DAMAGE.
>
>
> *************************************************************************=
*****/
> -/*$FreeBSD: src/sys/dev/e1000/if_igb.c,v 1.3.2.13 2010/11/27 01:09:54
> jfv Exp $*/
> +/*$FreeBSD: stable/7/sys/dev/e1000/if_igb.c 216465 2010-12-15 22:48:44Z
> jfv $*/
>
>
> =A0#ifdef HAVE_KERNEL_OPTION_HEADERS
> @@ -318,10 +318,6 @@
> =A0static bool igb_header_split =3D FALSE;
> =A0TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
>
> -/*
> -** This will autoconfigure based on
> -** the number of CPUs if left at 0.
> -*/
> =A0static int igb_num_queues =3D 0;
> =A0TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
>
> @@ -784,10 +780,14 @@
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return;
>
> =A0 =A0 =A0 =A0/* Call cleanup if number of TX descriptors low */
> +#if 0
> =A0 =A0 =A0 =A0if (txr->tx_avail <=3D IGB_TX_CLEANUP_THRESHOLD)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0igb_txeof(txr);
> +#endif
>
> =A0 =A0 =A0 =A0while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (txr->tx_avail <=3D IGB_TX_CLEANUP_THRES=
HOLD)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 igb_txeof(txr);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (txr->tx_avail <=3D IGB_TX_OP_THRESHOLD=
) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ifp->if_drv_flags |=3D IFF=
_DRV_OACTIVE;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break;

> @@ -1162,10 +1162,10 @@
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0IGB_TX_LOCK(txr);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (igb_txeof(txr))
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0more =3D TRUE;
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 igb_start_locked(txr, ifp);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 /*if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) Poin=
tless as
> igb_start_locked() checks this right off the bat*/
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 igb_start_locked(txr, ifp);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0IGB_TX_UNLOCK(txr);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (more) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (more || (ifp->if_drv_flags & IFF_DRV_OA=
CTIVE)) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0taskqueue_enqueue(que->tq,=
 &que->que_task);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
that hunk seem already to be in the HEAD driver.

> @@ -1298,15 +1298,20 @@
> =A0 =A0 =A0 =A0struct rx_ring *rxr =3D que->rxr;
> =A0 =A0 =A0 =A0u32 =A0 =A0 =A0 =A0 =A0 =A0 newitr =3D 0;
> =A0 =A0 =A0 =A0bool =A0 =A0 =A0 =A0 =A0 =A0more_tx, more_rx;
> + =A0 =A0 =A0 struct ifnet =A0 =A0*ifp =3D adapter->ifp;
>
> +#if 0
> =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
> + =A0 =A0 =A0 newitr =3D E1000_READ_REG(&adapter->hw, E1000_EICR);
> +#endif
> + =A0 =A0 =A0 E1000_WRITE_REG(&adapter->hw, E1000_EICR, que->eims);
> =A0 =A0 =A0 =A0++que->irqs;
>
> =A0 =A0 =A0 =A0IGB_TX_LOCK(txr);
> =A0 =A0 =A0 =A0more_tx =3D igb_txeof(txr);
> =A0 =A0 =A0 =A0IGB_TX_UNLOCK(txr);
>
already in HEAD.

> - =A0 =A0 =A0 more_rx =3D igb_rxeof(que, adapter->rx_process_limit, NULL)=
;
> + =A0 =A0 =A0 more_rx =3D igb_rxeof(que, -1, NULL);
>
> =A0 =A0 =A0 =A0if (igb_enable_aim =3D=3D FALSE)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0goto no_calc;
applied manually.

> @@ -1361,7 +1366,7 @@
>
> =A0no_calc:
> =A0 =A0 =A0 =A0/* Schedule a clean task if needed*/
> - =A0 =A0 =A0 if (more_tx || more_rx)
> + =A0 =A0 =A0 if (more_tx || more_rx || (ifp->if_drv_flags & IFF_DRV_OACT=
IVE))
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0taskqueue_enqueue(que->tq, &que->que_task)=
;
> =A0 =A0 =A0 =A0else
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* Reenable this interrupt */
already in HEAD.

> @@ -1382,6 +1387,7 @@
> =A0 =A0 =A0 =A0struct adapter =A0*adapter =3D arg;
> =A0 =A0 =A0 =A0u32 =A0 =A0 =A0 =A0 =A0 =A0 icr;
>
> + =A0 =A0 =A0 E1000_WRITE_REG(&adapter->hw, E1000_EICR, adapter->link_mas=
k);
> =A0 =A0 =A0 =A0++adapter->link_irq;
> =A0 =A0 =A0 =A0icr =3D E1000_READ_REG(&adapter->hw, E1000_ICR);
> =A0 =A0 =A0 =A0if (!(icr & E1000_ICR_LSC))
applied manually.

> @@ -1535,6 +1541,14 @@
> =A0 =A0 =A0 =A0if (m_head->m_flags & M_VLANTAG)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0cmd_type_len |=3D E1000_ADVTXD_DCMD_VLE;
>
> +/*
> + * We just did this in before invocation, seems completely
> + * redundant, igb_handle_queue -> igb_txeof
> + * Pretty sure this is impossible as we check for the
> + * IGB_TX_CLEANUP_THRESHOLD in igb_start_locked() which happens
> + * before this func in invoked
> + */
> +#if 0
> =A0 =A0 =A0 =A0 /*
> =A0 =A0 =A0 =A0 =A0* Force a cleanup if number of TX descriptors
> =A0 =A0 =A0 =A0 =A0* available hits the threshold
> @@ -1547,6 +1561,7 @@
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (ENOBUFS);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
> =A0 =A0 =A0 =A0}
> +#endif
>
> =A0 =A0 =A0 =A0/*
> =A0 =A0 =A0 =A0 =A0* Map the packet for DMA.
already in HEAD.

> @@ -2082,6 +2097,7 @@
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0que->eims =3D E1000_EICR_T=
X_QUEUE0 << i;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0else
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0que->eims =3D 1 << vector;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 device_printf(adapter->dev, "que %d eims=3D=
 0x%x\n", i,
> que->eims);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/*
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0** Bind the msix vector, and thus the
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0** rings to the corresponding cpu.
applied manually

> @@ -3264,8 +3280,6 @@
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0case ETHERTYPE_IPV6:
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ip6 =3D (struct ip6_hdr *)=
(mp->m_data + ehdrlen);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ip_hlen =3D sizeof(struct =
ip6_hdr);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (mp->m_len < ehdrlen + i=
p_hlen)
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 return (FAL=
SE);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ipproto =3D ip6->ip6_nxt;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0type_tucmd_mlhl |=3D E1000=
_ADVTXD_TUCMD_IPV6;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break;
already in HEAD.

> @@ -4461,6 +4475,24 @@
> =A0static void
> =A0igb_enable_intr(struct adapter *adapter)
> =A0{
> + =A0 =A0 =A0 uint32_t ims, regval;
> +
> + =A0 =A0 =A0 =A0if (adapter->msix_mem) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ims =3D E1000_IMS_LSC | E1000_IMS_DOUTSY=
NC;
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0regval =3D E1000_READ_REG(&adapter->hw, =
E1000_EIAC);
> +#if 0
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAC=
, regval |
> adapter->eims_mask);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0regval =3D E1000_READ_REG(&adapter->hw, =
E1000_EIAM);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAM=
, regval |
> adapter->eims_mask);
> +#endif
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIMS=
,
> adapter->eims_mask);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMS,=
 ims);
> + =A0 =A0 =A0 =A0} else {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMS,
> IMS_ENABLE_MASK | E1000_IMS_DRSTA);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IAM,
> IMS_ENABLE_MASK | E1000_IMS_DRSTA);
> + =A0 =A0 =A0 =A0}
> +
> +#if 0
> =A0 =A0 =A0 =A0/* With RSS set up what to auto clear */
> =A0 =A0 =A0 =A0if (adapter->msix_mem) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
> @@ -4475,6 +4507,7 @@
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMS,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0IMS_ENABLE_MASK);
> =A0 =A0 =A0 =A0}
> +#endif
> =A0 =A0 =A0 =A0E1000_WRITE_FLUSH(&adapter->hw);
>
> =A0 =A0 =A0 =A0return;
> @@ -4483,11 +4516,33 @@
> =A0static void
> =A0igb_disable_intr(struct adapter *adapter)
> =A0{
> + =A0 =A0 =A0 uint32_t regval;
> +
> + =A0 =A0 =A0 =A0/*
> + =A0 =A0 =A0 =A0 * we need to be careful when disabling interrupts. =A0T=
he VFs
> are also
> + =A0 =A0 =A0 =A0 * mapped into these registers and so clearing the bits =
can
> cause
> + =A0 =A0 =A0 =A0 * issues on the VF drivers so we only need to clear wha=
t we
> set
> + =A0 =A0 =A0 =A0 */
> + =A0 =A0 =A0 =A0if (adapter->msix_mem) {
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0regval =3D E1000_READ_REG(&adapter->hw, =
E1000_EIAM);
> +#if 0
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAM=
, regval &
> ~adapter->eims_mask);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0regval =3D E1000_READ_REG(&adapter->hw, =
E1000_EIAC);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAC=
, regval &
> ~adapter->eims_mask);
> +#endif
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIMC=
,
> adapter->eims_mask);
> + =A0 =A0 =A0 =A0}
> +
> + =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IAM, 0);
> + =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
> +
> +#if 0
> =A0 =A0 =A0 =A0if (adapter->msix_mem) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIMC, =
~0);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAC, =
0);
> =A0 =A0 =A0 =A0}
> =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
> +#endif
> =A0 =A0 =A0 =A0E1000_WRITE_FLUSH(&adapter->hw);
> =A0 =A0 =A0 =A0return;
> =A0}
applied manually.

> @@ -4876,8 +4931,8 @@
> =A0 =A0 =A0 =A0struct sysctl_oid_list *child =3D SYSCTL_CHILDREN(tree);
> =A0 =A0 =A0 =A0struct e1000_hw_stats *stats =3D adapter->stats;
>
> - =A0 =A0 =A0 struct sysctl_oid *stat_node, *queue_node, *int_node,
> *host_node;
> - =A0 =A0 =A0 struct sysctl_oid_list *stat_list, *queue_list, *int_list,
> *host_list;
> + =A0 =A0 =A0 struct sysctl_oid *stat_node, *queue_node, *int_node,
> *host_node, *debug_node;
> + =A0 =A0 =A0 struct sysctl_oid_list *stat_list, *queue_list, *int_list,
> *host_list, *debug_list;
>
> =A0#define QUEUE_NAME_LEN 32
> =A0 =A0 =A0 =A0char namebuf[QUEUE_NAME_LEN];
> @@ -5245,6 +5300,27 @@
> =A0 =A0 =A0 =A0SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_mi=
ssed",
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0CTLFLAG_RD, &stats->hrmpc,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0"Header Redirection Missed=
 Packet Count");
> +
> +
> + =A0 =A0 =A0 debug_node =3D SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "debug=
",
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CTL=
FLAG_RD, NULL,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "De=
bug info");
> +
> + =A0 =A0 =A0 debug_list =3D SYSCTL_CHILDREN(debug_node);
> +
> + =A0 =A0 =A0 txr =3D adapter->tx_rings;
> + =A0 =A0 =A0 SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "desc_avail",
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CTLFLAG_RD, (u_int *)(uintp=
tr_t)&txr->tx_avail,
> 0,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "");
> +
> + =A0 =A0 =A0 SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "next_to_clean",
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CTLFLAG_RD, &txr->next_to_c=
lean, 0,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "");
> +
> + =A0 =A0 =A0 SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "if_flags",
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CTLFLAG_RD, &adapter->ifp->=
if_drv_flags, 0,
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "");
> +
> =A0}
>
applied manually.

LYK,
 - Arnaud



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CACqU3MXLm83AxNkfYkhaJzjE1_iJgw-kgAC=cvNsqxL2HQyGqg>