Date: Sat, 13 Jul 2013 04:25:03 +0000 (UTC) From: Adrian Chadd <adrian@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r253314 - head/sys/net Message-ID: <201307130425.r6D4P31J078644@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: adrian Date: Sat Jul 13 04:25:03 2013 New Revision: 253314 URL: http://svnweb.freebsd.org/changeset/base/253314 Log: Bring over some link aggregation / LACP protocol improvements and debugging additions. * Add some new tracing events to aid in debugging. * Add in a debugging mode to drop transmit and received frames, specifically to test whether seeing or hearing heartbeats correctly cause LACP to drop the port. * Add in (and make default) a strict LACP mode, which requires the heartbeat on a port to be heard before it's used. Sometimes vendor ports will hang but the link layer stays up, resulting in hung traffic. * Add logging the number of link status flaps, again to aid in debugging badly behaving switch ports. * Calculate the lagg interface port speed as the multiple of the configured ports, rather than the largest. Obtained from: Netflix MFC after: 2 weeks Modified: head/sys/net/ieee8023ad_lacp.c head/sys/net/if_lagg.c head/sys/net/if_lagg.h Modified: head/sys/net/ieee8023ad_lacp.c ============================================================================== --- head/sys/net/ieee8023ad_lacp.c Sat Jul 13 01:24:16 2013 (r253313) +++ head/sys/net/ieee8023ad_lacp.c Sat Jul 13 04:25:03 2013 (r253314) @@ -188,29 +188,43 @@ static void lacp_dprintf(const struct la __attribute__((__format__(__printf__, 2, 3))); static int lacp_debug = 0; -SYSCTL_INT(_net, OID_AUTO, lacp_debug, CTLFLAG_RW | CTLFLAG_TUN, +SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad"); +SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN, &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)"); -TUNABLE_INT("net.lacp_debug", &lacp_debug); +TUNABLE_INT("net.link.lagg.lacp.debug", &lacp_debug); -#define LACP_DPRINTF(a) if (lacp_debug > 0) { lacp_dprintf a ; } -#define LACP_TRACE(a) if (lacp_debug > 1) { lacp_dprintf(a,"%s\n",__func__); } +/* bitmap of ports */ +static int lacp_rx_test = 0; +static int lacp_tx_test = 0; +SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, rxtest, CTLFLAG_RW, &lacp_rx_test, 0, + "RXTest"); +SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, txtest, CTLFLAG_RW, &lacp_tx_test, 0, + "TXTest"); + +static int lacp_strict = 1; +SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, strict, CTLFLAG_RW, &lacp_strict, + 0, "Strict spec compliance"); + +#define LACP_DPRINTF(a) if (lacp_debug & 0x01) { lacp_dprintf a ; } +#define LACP_TRACE(a) if (lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); } +#define LACP_TPRINTF(a) if (lacp_debug & 0x04) { lacp_dprintf a ; } /* * partner administration variables. * XXX should be configurable. */ -static const struct lacp_peerinfo lacp_partner_admin = { +static const struct lacp_peerinfo lacp_partner_admin_optimistic = { .lip_systemid = { .lsi_prio = 0xffff }, .lip_portid = { .lpi_prio = 0xffff }, -#if 1 - /* optimistic */ .lip_state = LACP_STATE_SYNC | LACP_STATE_AGGREGATION | LACP_STATE_COLLECTING | LACP_STATE_DISTRIBUTING, -#else - /* pessimistic */ +}; + +static const struct lacp_peerinfo lacp_partner_admin_strict = { + .lip_systemid = { .lsi_prio = 0xffff }, + .lip_portid = { .lpi_prio = 0xffff }, .lip_state = 0, -#endif }; static const lacp_timer_func_t lacp_timer_funcs[LACP_NTIMER] = { @@ -301,6 +315,11 @@ lacp_pdu_input(struct lacp_port *lp, str lacp_dump_lacpdu(du); } + if ((1 << lp->lp_ifp->if_dunit) & lacp_rx_test) { + LACP_TPRINTF((lp, "Dropping RX PDU\n")); + goto bad; + } + LACP_LOCK(lsc); lacp_sm_rx(lp, du); LACP_UNLOCK(lsc); @@ -653,6 +672,7 @@ lacp_disable_distributing(struct lacp_po { struct lacp_aggregator *la = lp->lp_aggregator; struct lacp_softc *lsc = lp->lp_lsc; + struct lagg_softc *sc = lsc->lsc_softc; char buf[LACP_LAGIDSTR_MAX+1]; LACP_LOCK_ASSERT(lsc); @@ -672,6 +692,7 @@ lacp_disable_distributing(struct lacp_po TAILQ_REMOVE(&la->la_ports, lp, lp_dist_q); la->la_nports--; + sc->sc_active = la->la_nports; if (lsc->lsc_active_aggregator == la) { lacp_suppress_distributing(lsc, la); @@ -688,6 +709,7 @@ lacp_enable_distributing(struct lacp_por { struct lacp_aggregator *la = lp->lp_aggregator; struct lacp_softc *lsc = lp->lp_lsc; + struct lagg_softc *sc = lsc->lsc_softc; char buf[LACP_LAGIDSTR_MAX+1]; LACP_LOCK_ASSERT(lsc); @@ -704,6 +726,7 @@ lacp_enable_distributing(struct lacp_por KASSERT(la->la_refcnt > la->la_nports, ("aggregator refcnt invalid")); TAILQ_INSERT_HEAD(&la->la_ports, lp, lp_dist_q); la->la_nports++; + sc->sc_active = la->la_nports; lp->lp_state |= LACP_STATE_DISTRIBUTING; @@ -908,7 +931,6 @@ lacp_aggregator_bandwidth(struct lacp_ag static void lacp_select_active_aggregator(struct lacp_softc *lsc) { - struct lagg_softc *sc = lsc->lsc_softc; struct lacp_aggregator *la; struct lacp_aggregator *best_la = NULL; uint64_t best_speed = 0; @@ -960,7 +982,6 @@ lacp_select_active_aggregator(struct lac lacp_format_lagid_aggregator(best_la, buf, sizeof(buf)))); if (lsc->lsc_active_aggregator != best_la) { - sc->sc_ifp->if_baudrate = best_speed; lsc->lsc_active_aggregator = best_la; lacp_update_portmap(lsc); if (best_la) { @@ -976,15 +997,18 @@ lacp_select_active_aggregator(struct lac static void lacp_update_portmap(struct lacp_softc *lsc) { + struct lagg_softc *sc = lsc->lsc_softc; struct lacp_aggregator *la; struct lacp_portmap *p; struct lacp_port *lp; + uint64_t speed; u_int newmap; int i; newmap = lsc->lsc_activemap == 0 ? 1 : 0; p = &lsc->lsc_pmap[newmap]; la = lsc->lsc_active_aggregator; + speed = 0; bzero(p, sizeof(struct lacp_portmap)); if (la != NULL && la->la_nports > 0) { @@ -993,7 +1017,9 @@ lacp_update_portmap(struct lacp_softc *l TAILQ_FOREACH(lp, &la->la_ports, lp_dist_q) p->pm_map[i++] = lp; KASSERT(i == p->pm_count, ("Invalid port count")); + speed = lacp_aggregator_bandwidth(la); } + sc->sc_ifp->if_baudrate = speed; /* switch the active portmap over */ atomic_store_rel_int(&lsc->lsc_activemap, newmap); @@ -1264,6 +1290,8 @@ lacp_unselect(struct lacp_port *lp) static void lacp_sm_mux(struct lacp_port *lp) { + struct lagg_port *lgp = lp->lp_lagg; + struct lagg_softc *sc = lgp->lp_softc; enum lacp_mux_state new_state; boolean_t p_sync = (lp->lp_partner.lip_state & LACP_STATE_SYNC) != 0; @@ -1273,7 +1301,9 @@ lacp_sm_mux(struct lacp_port *lp) struct lacp_aggregator *la; if (lacp_debug > 1) - lacp_dprintf(lp, "%s: state %d\n", __func__, lp->lp_mux_state); + lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, " + "p_sync= 0x%x, p_collecting= 0x%x\n", __func__, + lp->lp_mux_state, selected, p_sync, p_collecting); re_eval: la = lp->lp_aggregator; @@ -1313,6 +1343,8 @@ re_eval: case LACP_MUX_DISTRIBUTING: if (selected != LACP_SELECTED || !p_sync || !p_collecting) { new_state = LACP_MUX_COLLECTING; + lacp_dprintf(lp, "Interface stopped DISTRIBUTING, possible flaping\n"); + sc->sc_flapping++; } break; default: @@ -1561,6 +1593,10 @@ lacp_sm_rx_record_pdu(struct lacp_port * sizeof(buf)))); } + /* XXX Hack, still need to implement 5.4.9 para 2,3,4 */ + if (lacp_strict) + lp->lp_partner.lip_state |= LACP_STATE_SYNC; + lacp_sm_ptx_update_timeout(lp, oldpstate); } @@ -1586,7 +1622,10 @@ lacp_sm_rx_record_default(struct lacp_po LACP_TRACE(lp); oldpstate = lp->lp_partner.lip_state; - lp->lp_partner = lacp_partner_admin; + if (lacp_strict) + lp->lp_partner = lacp_partner_admin_strict; + else + lp->lp_partner = lacp_partner_admin_optimistic;; lp->lp_state |= LACP_STATE_DEFAULTED; lacp_sm_ptx_update_timeout(lp, oldpstate); } @@ -1621,7 +1660,12 @@ lacp_sm_rx_update_default_selected(struc LACP_TRACE(lp); - lacp_sm_rx_update_selected_from_peerinfo(lp, &lacp_partner_admin); + if (lacp_strict) + lacp_sm_rx_update_selected_from_peerinfo(lp, + &lacp_partner_admin_strict); + else + lacp_sm_rx_update_selected_from_peerinfo(lp, + &lacp_partner_admin_optimistic); } /* transmit machine */ @@ -1629,7 +1673,7 @@ lacp_sm_rx_update_default_selected(struc static void lacp_sm_tx(struct lacp_port *lp) { - int error; + int error = 0; if (!(lp->lp_state & LACP_STATE_AGGREGATION) #if 1 @@ -1651,7 +1695,10 @@ lacp_sm_tx(struct lacp_port *lp) return; } - error = lacp_xmit_lacpdu(lp); + if (((1 << lp->lp_ifp->if_dunit) & lacp_tx_test) == 0) + error = lacp_xmit_lacpdu(lp); + else + LACP_TPRINTF((lp, "Dropping TX PDU\n")); if (error == 0) { lp->lp_flags &= ~LACP_PORT_NTT; Modified: head/sys/net/if_lagg.c ============================================================================== --- head/sys/net/if_lagg.c Sat Jul 13 01:24:16 2013 (r253313) +++ head/sys/net/if_lagg.c Sat Jul 13 04:25:03 2013 (r253314) @@ -122,6 +122,7 @@ static void lagg_media_status(struct ifn static struct lagg_port *lagg_link_active(struct lagg_softc *, struct lagg_port *); static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *); +static int lagg_sysctl_active(SYSCTL_HANDLER_ARGS); /* Simple round robin */ static int lagg_rr_attach(struct lagg_softc *); @@ -171,7 +172,7 @@ static const struct { }; SYSCTL_DECL(_net_link); -static SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, +SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); static int lagg_failover_rx_all = 0; /* Allow input on any failover links */ @@ -298,6 +299,12 @@ lagg_clone_create(struct if_clone *ifc, SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, "count", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_count, sc->sc_count, "Total number of ports"); + SYSCTL_ADD_PROC(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + "active", CTLTYPE_INT|CTLFLAG_RD, sc, 0, lagg_sysctl_active, + "I", "Total number of active ports"); + SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + "flapping", CTLTYPE_INT|CTLFLAG_RD, &sc->sc_flapping, + sc->sc_flapping, "Total number of port change events"); /* Hash all layers by default */ sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4; @@ -1488,6 +1495,27 @@ lagg_gethdr(struct mbuf *m, u_int off, u return (mtod(m, char *) + off); } +static int +lagg_sysctl_active(SYSCTL_HANDLER_ARGS) +{ + struct lagg_softc *sc = (struct lagg_softc *)arg1; + struct lagg_port *lp; + int error; + + /* LACP tracks active links automatically, the others do not */ + if (sc->sc_proto != LAGG_PROTO_LACP) { + sc->sc_active = 0; + SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) + sc->sc_active += LAGG_PORTACTIVE(lp); + } + + error = sysctl_handle_int(oidp, &sc->sc_active, 0, req); + if ((error) || (req->newptr == NULL)) + return (error); + + return (0); +} + uint32_t lagg_hashmbuf(struct lagg_softc *sc, struct mbuf *m, uint32_t key) { Modified: head/sys/net/if_lagg.h ============================================================================== --- head/sys/net/if_lagg.h Sat Jul 13 01:24:16 2013 (r253313) +++ head/sys/net/if_lagg.h Sat Jul 13 04:25:03 2013 (r253314) @@ -190,6 +190,9 @@ struct lagg_softc { struct rwlock sc_mtx; int sc_proto; /* lagg protocol */ u_int sc_count; /* number of ports */ + u_int sc_active; /* active port count */ + u_int sc_flapping; /* number of flapping + * events */ struct lagg_port *sc_primary; /* primary port */ struct ifmedia sc_media; /* media config */ caddr_t sc_psc; /* protocol data */ @@ -266,6 +269,8 @@ extern void (*lagg_linkstate_p)(struct i int lagg_enqueue(struct ifnet *, struct mbuf *); uint32_t lagg_hashmbuf(struct lagg_softc *, struct mbuf *, uint32_t); +SYSCTL_DECL(_net_link_lagg); + #endif /* _KERNEL */ #endif /* _NET_LAGG_H */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201307130425.r6D4P31J078644>