From owner-freebsd-net@FreeBSD.ORG Thu Sep 8 19:04:36 2011 Return-Path: Delivered-To: freebsd-net@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 88EB6106566B for ; Thu, 8 Sep 2011 19:04:36 +0000 (UTC) (envelope-from lacombar@gmail.com) Received: from mail-gw0-f49.google.com (mail-gw0-f49.google.com [74.125.83.49]) by mx1.freebsd.org (Postfix) with ESMTP id 4675B8FC13 for ; Thu, 8 Sep 2011 19:04:36 +0000 (UTC) Received: by gwb1 with SMTP id 1so310909gwb.36 for ; Thu, 08 Sep 2011 12:04:35 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=mime-version:in-reply-to:references:date:message-id:subject:from:to :cc:content-type:content-transfer-encoding; bh=rK6DQ+EWJ+dH8IGH+9OLpZduknY425Ao0HIntqvYSu4=; b=i6GAh0rv3fXrEoak2in9A5t08G15hxYZX4BwxSfqrFayAISNHPcHL2K7ZeMXlsDebG zcyTDCPETezTIYl+t18CVDfn1wn6WGFWBCPFRQy96Hrv2CwK4NnvcYSY7/Gl1P9OQT29 nDqtsknA4Yb4idilQgf4zGVbnpyAIBIirubAY= MIME-Version: 1.0 Received: by 10.68.20.99 with SMTP id m3mr1641616pbe.444.1315508675059; Thu, 08 Sep 2011 12:04:35 -0700 (PDT) Received: by 10.142.232.15 with HTTP; Thu, 8 Sep 2011 12:04:35 -0700 (PDT) In-Reply-To: <1315441138.2872.13.camel@hitfishpass-lx.corp.yahoo.com> References: <1315441138.2872.13.camel@hitfishpass-lx.corp.yahoo.com> Date: Thu, 8 Sep 2011 15:04:35 -0400 Message-ID: From: Arnaud Lacombe To: Sean Bruno Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable Cc: "freebsd-net@freebsd.org" , Jack Vogel Subject: Re: FreeBSD 7-STABLE mbuf corruption X-BeenThere: freebsd-net@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Networking and TCP/IP with FreeBSD List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 08 Sep 2011 19:04:36 -0000 Hi, On Wed, Sep 7, 2011 at 8:18 PM, Sean Bruno wrote: > [...] > In addition to some txeof() handling patches ... Jack can see if this is > still relevant in the freebsd7 universe. Note that Yahoo is "special" > and we pluck and chuck code/drivers/whatever at will so I don't know how > this code will apply to stable-7 or if it has been applied already. > > Note that this code "rage-quits" the EIAM/EIAC auto ack strategy and > tries to handle the OACTIVE state handling better. =A0I hope these clues > find you well. > Just for the record, I'll comment inline. > --- //depot/yahoo/ybsd_7/src/sys/dev/e1000/if_igb.c =A0 =A0 2010-11-29 > 20:47:23.000000000 0000 > +++ /home/seanbru/ybsd_7/src/sys/dev/e1000/if_igb.c =A0 =A0 2010-11-29 > 20:47:23.000000000 0000 > @@ -30,7 +30,7 @@ > =A0 POSSIBILITY OF SUCH DAMAGE. > > > *************************************************************************= *****/ > -/*$FreeBSD: src/sys/dev/e1000/if_igb.c,v 1.3.2.13 2010/11/27 01:09:54 > jfv Exp $*/ > +/*$FreeBSD: stable/7/sys/dev/e1000/if_igb.c 216465 2010-12-15 22:48:44Z > jfv $*/ > > > =A0#ifdef HAVE_KERNEL_OPTION_HEADERS > @@ -318,10 +318,6 @@ > =A0static bool igb_header_split =3D FALSE; > =A0TUNABLE_INT("hw.igb.hdr_split", &igb_header_split); > > -/* > -** This will autoconfigure based on > -** the number of CPUs if left at 0. > -*/ > =A0static int igb_num_queues =3D 0; > =A0TUNABLE_INT("hw.igb.num_queues", &igb_num_queues); > > @@ -784,10 +780,14 @@ > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return; > > =A0 =A0 =A0 =A0/* Call cleanup if number of TX descriptors low */ > +#if 0 > =A0 =A0 =A0 =A0if (txr->tx_avail <=3D IGB_TX_CLEANUP_THRESHOLD) > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0igb_txeof(txr); > +#endif > > =A0 =A0 =A0 =A0while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (txr->tx_avail <=3D IGB_TX_CLEANUP_THRES= HOLD) > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 igb_txeof(txr); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (txr->tx_avail <=3D IGB_TX_OP_THRESHOLD= ) { > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ifp->if_drv_flags |=3D IFF= _DRV_OACTIVE; > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break; > @@ -1162,10 +1162,10 @@ > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0IGB_TX_LOCK(txr); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (igb_txeof(txr)) > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0more =3D TRUE; > - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) > - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 igb_start_locked(txr, ifp); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 /*if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) Poin= tless as > igb_start_locked() checks this right off the bat*/ > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 igb_start_locked(txr, ifp); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0IGB_TX_UNLOCK(txr); > - =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (more) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (more || (ifp->if_drv_flags & IFF_DRV_OA= CTIVE)) { > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0taskqueue_enqueue(que->tq,= &que->que_task); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return; > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0} that hunk seem already to be in the HEAD driver. > @@ -1298,15 +1298,20 @@ > =A0 =A0 =A0 =A0struct rx_ring *rxr =3D que->rxr; > =A0 =A0 =A0 =A0u32 =A0 =A0 =A0 =A0 =A0 =A0 newitr =3D 0; > =A0 =A0 =A0 =A0bool =A0 =A0 =A0 =A0 =A0 =A0more_tx, more_rx; > + =A0 =A0 =A0 struct ifnet =A0 =A0*ifp =3D adapter->ifp; > > +#if 0 > =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); > + =A0 =A0 =A0 newitr =3D E1000_READ_REG(&adapter->hw, E1000_EICR); > +#endif > + =A0 =A0 =A0 E1000_WRITE_REG(&adapter->hw, E1000_EICR, que->eims); > =A0 =A0 =A0 =A0++que->irqs; > > =A0 =A0 =A0 =A0IGB_TX_LOCK(txr); > =A0 =A0 =A0 =A0more_tx =3D igb_txeof(txr); > =A0 =A0 =A0 =A0IGB_TX_UNLOCK(txr); > already in HEAD. > - =A0 =A0 =A0 more_rx =3D igb_rxeof(que, adapter->rx_process_limit, NULL)= ; > + =A0 =A0 =A0 more_rx =3D igb_rxeof(que, -1, NULL); > > =A0 =A0 =A0 =A0if (igb_enable_aim =3D=3D FALSE) > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0goto no_calc; applied manually. > @@ -1361,7 +1366,7 @@ > > =A0no_calc: > =A0 =A0 =A0 =A0/* Schedule a clean task if needed*/ > - =A0 =A0 =A0 if (more_tx || more_rx) > + =A0 =A0 =A0 if (more_tx || more_rx || (ifp->if_drv_flags & IFF_DRV_OACT= IVE)) > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0taskqueue_enqueue(que->tq, &que->que_task)= ; > =A0 =A0 =A0 =A0else > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* Reenable this interrupt */ already in HEAD. > @@ -1382,6 +1387,7 @@ > =A0 =A0 =A0 =A0struct adapter =A0*adapter =3D arg; > =A0 =A0 =A0 =A0u32 =A0 =A0 =A0 =A0 =A0 =A0 icr; > > + =A0 =A0 =A0 E1000_WRITE_REG(&adapter->hw, E1000_EICR, adapter->link_mas= k); > =A0 =A0 =A0 =A0++adapter->link_irq; > =A0 =A0 =A0 =A0icr =3D E1000_READ_REG(&adapter->hw, E1000_ICR); > =A0 =A0 =A0 =A0if (!(icr & E1000_ICR_LSC)) applied manually. > @@ -1535,6 +1541,14 @@ > =A0 =A0 =A0 =A0if (m_head->m_flags & M_VLANTAG) > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0cmd_type_len |=3D E1000_ADVTXD_DCMD_VLE; > > +/* > + * We just did this in before invocation, seems completely > + * redundant, igb_handle_queue -> igb_txeof > + * Pretty sure this is impossible as we check for the > + * IGB_TX_CLEANUP_THRESHOLD in igb_start_locked() which happens > + * before this func in invoked > + */ > +#if 0 > =A0 =A0 =A0 =A0 /* > =A0 =A0 =A0 =A0 =A0* Force a cleanup if number of TX descriptors > =A0 =A0 =A0 =A0 =A0* available hits the threshold > @@ -1547,6 +1561,7 @@ > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (ENOBUFS); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0} > =A0 =A0 =A0 =A0} > +#endif > > =A0 =A0 =A0 =A0/* > =A0 =A0 =A0 =A0 =A0* Map the packet for DMA. already in HEAD. > @@ -2082,6 +2097,7 @@ > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0que->eims =3D E1000_EICR_T= X_QUEUE0 << i; > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0else > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0que->eims =3D 1 << vector; > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 device_printf(adapter->dev, "que %d eims=3D= 0x%x\n", i, > que->eims); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0** Bind the msix vector, and thus the > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0** rings to the corresponding cpu. applied manually > @@ -3264,8 +3280,6 @@ > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0case ETHERTYPE_IPV6: > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ip6 =3D (struct ip6_hdr *)= (mp->m_data + ehdrlen); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ip_hlen =3D sizeof(struct = ip6_hdr); > - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (mp->m_len < ehdrlen + i= p_hlen) > - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 return (FAL= SE); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ipproto =3D ip6->ip6_nxt; > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0type_tucmd_mlhl |=3D E1000= _ADVTXD_TUCMD_IPV6; > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break; already in HEAD. > @@ -4461,6 +4475,24 @@ > =A0static void > =A0igb_enable_intr(struct adapter *adapter) > =A0{ > + =A0 =A0 =A0 uint32_t ims, regval; > + > + =A0 =A0 =A0 =A0if (adapter->msix_mem) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ims =3D E1000_IMS_LSC | E1000_IMS_DOUTSY= NC; > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0regval =3D E1000_READ_REG(&adapter->hw, = E1000_EIAC); > +#if 0 > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAC= , regval | > adapter->eims_mask); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0regval =3D E1000_READ_REG(&adapter->hw, = E1000_EIAM); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAM= , regval | > adapter->eims_mask); > +#endif > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIMS= , > adapter->eims_mask); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMS,= ims); > + =A0 =A0 =A0 =A0} else { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMS, > IMS_ENABLE_MASK | E1000_IMS_DRSTA); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IAM, > IMS_ENABLE_MASK | E1000_IMS_DRSTA); > + =A0 =A0 =A0 =A0} > + > +#if 0 > =A0 =A0 =A0 =A0/* With RSS set up what to auto clear */ > =A0 =A0 =A0 =A0if (adapter->msix_mem) { > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAC, > @@ -4475,6 +4507,7 @@ > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMS, > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0IMS_ENABLE_MASK); > =A0 =A0 =A0 =A0} > +#endif > =A0 =A0 =A0 =A0E1000_WRITE_FLUSH(&adapter->hw); > > =A0 =A0 =A0 =A0return; > @@ -4483,11 +4516,33 @@ > =A0static void > =A0igb_disable_intr(struct adapter *adapter) > =A0{ > + =A0 =A0 =A0 uint32_t regval; > + > + =A0 =A0 =A0 =A0/* > + =A0 =A0 =A0 =A0 * we need to be careful when disabling interrupts. =A0T= he VFs > are also > + =A0 =A0 =A0 =A0 * mapped into these registers and so clearing the bits = can > cause > + =A0 =A0 =A0 =A0 * issues on the VF drivers so we only need to clear wha= t we > set > + =A0 =A0 =A0 =A0 */ > + =A0 =A0 =A0 =A0if (adapter->msix_mem) { > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0regval =3D E1000_READ_REG(&adapter->hw, = E1000_EIAM); > +#if 0 > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAM= , regval & > ~adapter->eims_mask); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0regval =3D E1000_READ_REG(&adapter->hw, = E1000_EIAC); > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAC= , regval & > ~adapter->eims_mask); > +#endif > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIMC= , > adapter->eims_mask); > + =A0 =A0 =A0 =A0} > + > + =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IAM, 0); > + =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); > + > +#if 0 > =A0 =A0 =A0 =A0if (adapter->msix_mem) { > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIMC, = ~0); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_EIAC, = 0); > =A0 =A0 =A0 =A0} > =A0 =A0 =A0 =A0E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); > +#endif > =A0 =A0 =A0 =A0E1000_WRITE_FLUSH(&adapter->hw); > =A0 =A0 =A0 =A0return; > =A0} applied manually. > @@ -4876,8 +4931,8 @@ > =A0 =A0 =A0 =A0struct sysctl_oid_list *child =3D SYSCTL_CHILDREN(tree); > =A0 =A0 =A0 =A0struct e1000_hw_stats *stats =3D adapter->stats; > > - =A0 =A0 =A0 struct sysctl_oid *stat_node, *queue_node, *int_node, > *host_node; > - =A0 =A0 =A0 struct sysctl_oid_list *stat_list, *queue_list, *int_list, > *host_list; > + =A0 =A0 =A0 struct sysctl_oid *stat_node, *queue_node, *int_node, > *host_node, *debug_node; > + =A0 =A0 =A0 struct sysctl_oid_list *stat_list, *queue_list, *int_list, > *host_list, *debug_list; > > =A0#define QUEUE_NAME_LEN 32 > =A0 =A0 =A0 =A0char namebuf[QUEUE_NAME_LEN]; > @@ -5245,6 +5300,27 @@ > =A0 =A0 =A0 =A0SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_mi= ssed", > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0CTLFLAG_RD, &stats->hrmpc, > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0"Header Redirection Missed= Packet Count"); > + > + > + =A0 =A0 =A0 debug_node =3D SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "debug= ", > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CTL= FLAG_RD, NULL, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 "De= bug info"); > + > + =A0 =A0 =A0 debug_list =3D SYSCTL_CHILDREN(debug_node); > + > + =A0 =A0 =A0 txr =3D adapter->tx_rings; > + =A0 =A0 =A0 SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "desc_avail", > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CTLFLAG_RD, (u_int *)(uintp= tr_t)&txr->tx_avail, > 0, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ""); > + > + =A0 =A0 =A0 SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "next_to_clean", > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CTLFLAG_RD, &txr->next_to_c= lean, 0, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ""); > + > + =A0 =A0 =A0 SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "if_flags", > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CTLFLAG_RD, &adapter->ifp->= if_drv_flags, 0, > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ""); > + > =A0} > applied manually. LYK, - Arnaud