Date: Sat, 12 Sep 2015 20:36:40 +0000 (UTC) From: Hiroki Sato <hrs@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r287723 - in stable/10: sbin/ifconfig share/man/man4 sys/net Message-ID: <201509122036.t8CKae5m036054@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: hrs Date: Sat Sep 12 20:36:39 2015 New Revision: 287723 URL: https://svnweb.freebsd.org/changeset/base/287723 Log: MFC 272159,272161,272386,272446,272547,272548,273210: - Make lagg protos a enum. - When reconfiguring protocol on a lagg, first set it to LAGG_PROTO_NONE, then drop lock, run the attach routines, and then set it to specific proto. This removes tons of WITNESS warnings. - Make lagg protocol attach handlers not failing and allocate memory with M_WAITOK. - Virtualize lagg(4) cloner. This change fixes a panic when tearing down if_lagg(4) interfaces which were cloned in a vnet jail. Sysctl nodes which are dynamically generated for each cloned interface (net.link.lagg.N.*) have been removed, and use_flowid and flowid_shift ifconfig(8) parameters have been added instead. Flags and per-interface statistics counters are displayed in "ifconfig -v". - Separate option handling from SIOC[SG]LAGG to SIOC[SG]LAGGOPTS for backward compatibility with old ifconfig(8). - Move L2 addr configuration for the primary port to a taskqueue. This fixes LOR of softc rmlock in iflladdr_event handlers. - Call if_delmulti_ifma() after LACP_UNLOCK(). This fixes another LOR. - Fix a panic in lacp_transit_expire(). - Fix a panic in lagg_input() upon shutting down a port. - Use printb() for boolean flags in ro_opts and actor_state for LACP. - Fix lladdr configuration which could prevent LACP mode from working. - Fix LORs when a laggport interface has an IPv6 LLA. Modified: stable/10/sbin/ifconfig/ifconfig.8 stable/10/sbin/ifconfig/iflagg.c stable/10/share/man/man4/lagg.4 stable/10/sys/net/ieee8023ad_lacp.c stable/10/sys/net/ieee8023ad_lacp.h stable/10/sys/net/if_lagg.c stable/10/sys/net/if_lagg.h Directory Properties: stable/10/ (props changed) Modified: stable/10/sbin/ifconfig/ifconfig.8 ============================================================================== --- stable/10/sbin/ifconfig/ifconfig.8 Sat Sep 12 20:14:54 2015 (r287722) +++ stable/10/sbin/ifconfig/ifconfig.8 Sat Sep 12 20:36:39 2015 (r287723) @@ -28,7 +28,7 @@ .\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\" $FreeBSD$ .\" -.Dd July 24, 2015 +.Dd September 12, 2015 .Dt IFCONFIG 8 .Os .Sh NAME @@ -2393,9 +2393,16 @@ Remove the interface named by from the aggregation interface. .It Cm laggproto Ar proto Set the aggregation protocol. -The default is failover. -The available options are failover, fec, lacp, loadbalance, roundrobin and -none. +The default is +.Li failover . +The available options are +.Li failover , +.Li lacp , +.Li loadbalance , +.Li roundrobin , +.Li broadcast +and +.Li none . .It Cm lagghash Ar option Ns Oo , Ns Ar option Oc Set the packet layers to hash for aggregation protocols which load balance. The default is @@ -2410,7 +2417,34 @@ src/dst address for IPv4 or IPv6. .It Cm l4 src/dst port for TCP/UDP/SCTP. .El -.Pp +.It Cm use_flowid +Enable local hash computation for RSS hash on the interface. +The +.Li loadbalance +and +.Li lacp +modes will use the RSS hash from the network card if available +to avoid computing one, this may give poor traffic distribution +if the hash is invalid or uses less of the protocol header information. +.Cm use_flowid +disables use of RSS hash from the network card. +The default value can be set via the +.Va net.link.lagg.default_use_flowid +.Xr sysctl 8 +variable. +.Li 0 +means +.Dq disabled +and +.Li 1 +means +.Dq enabled . +.It Cm -use_flowid +Disable local hash computation for RSS hash on the interface. +.It Cm flowid_shift Ar number +Set a shift parameter for RSS local hash computation. +Hash is calculated by using flowid bits in a packet header mbuf +which are shifted by the number of this parameter. .El .Pp The following parameters are specific to IP tunnel interfaces, Modified: stable/10/sbin/ifconfig/iflagg.c ============================================================================== --- stable/10/sbin/ifconfig/iflagg.c Sat Sep 12 20:14:54 2015 (r287722) +++ stable/10/sbin/ifconfig/iflagg.c Sat Sep 12 20:36:39 2015 (r287723) @@ -17,6 +17,7 @@ static const char rcsid[] = #include <net/ethernet.h> #include <net/if.h> #include <net/if_lagg.h> +#include <net/ieee8023ad_lacp.h> #include <net/route.h> #include <ctype.h> @@ -68,7 +69,7 @@ setlaggproto(const char *val, int d, int bzero(&ra, sizeof(ra)); ra.ra_proto = LAGG_PROTO_MAX; - for (i = 0; i < (sizeof(lpr) / sizeof(lpr[0])); i++) { + for (i = 0; i < nitems(lpr); i++) { if (strcmp(val, lpr[i].lpr_name) == 0) { ra.ra_proto = lpr[i].lpr_proto; break; @@ -83,6 +84,48 @@ setlaggproto(const char *val, int d, int } static void +setlaggflowidshift(const char *val, int d, int s, const struct afswtch *afp) +{ + struct lagg_reqopts ro; + + bzero(&ro, sizeof(ro)); + ro.ro_opts = LAGG_OPT_FLOWIDSHIFT; + strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); + ro.ro_flowid_shift = (int)strtol(val, NULL, 10); + if (ro.ro_flowid_shift & ~LAGG_OPT_FLOWIDSHIFT_MASK) + errx(1, "Invalid flowid_shift option: %s", val); + + if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0) + err(1, "SIOCSLAGGOPTS"); +} + +static void +setlaggsetopt(const char *val, int d, int s, const struct afswtch *afp) +{ + struct lagg_reqopts ro; + + bzero(&ro, sizeof(ro)); + ro.ro_opts = d; + switch (ro.ro_opts) { + case LAGG_OPT_USE_FLOWID: + case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_LACP_STRICT: + case -LAGG_OPT_LACP_STRICT: + case LAGG_OPT_LACP_TXTEST: + case -LAGG_OPT_LACP_TXTEST: + case LAGG_OPT_LACP_RXTEST: + case -LAGG_OPT_LACP_RXTEST: + break; + default: + err(1, "Invalid lagg option"); + } + strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); + + if (ioctl(s, SIOCSLAGGOPTS, &ro) != 0) + err(1, "SIOCSLAGGOPTS"); +} + +static void setlagghash(const char *val, int d, int s, const struct afswtch *afp) { struct lagg_reqflags rf; @@ -144,6 +187,7 @@ lagg_status(int s) struct lagg_protos lpr[] = LAGG_PROTOS; struct lagg_reqport rp, rpbuf[LAGG_MAX_PORTS]; struct lagg_reqall ra; + struct lagg_reqopts ro; struct lagg_reqflags rf; struct lacp_opreq *lp; const char *proto = "<unknown>"; @@ -151,6 +195,7 @@ lagg_status(int s) bzero(&rp, sizeof(rp)); bzero(&ra, sizeof(ra)); + bzero(&ro, sizeof(ro)); strlcpy(rp.rp_ifname, name, sizeof(rp.rp_ifname)); strlcpy(rp.rp_portname, name, sizeof(rp.rp_portname)); @@ -162,6 +207,9 @@ lagg_status(int s) ra.ra_size = sizeof(rpbuf); ra.ra_port = rpbuf; + strlcpy(ro.ro_ifname, name, sizeof(ro.ro_ifname)); + ioctl(s, SIOCGLAGGOPTS, &ro); + strlcpy(rf.rf_ifname, name, sizeof(rf.rf_ifname)); if (ioctl(s, SIOCGLAGGFLAGS, &rf) != 0) rf.rf_flags = 0; @@ -169,7 +217,7 @@ lagg_status(int s) if (ioctl(s, SIOCGLAGG, &ra) == 0) { lp = (struct lacp_opreq *)&ra.ra_lacpreq; - for (i = 0; i < (sizeof(lpr) / sizeof(lpr[0])); i++) { + for (i = 0; i < nitems(lpr); i++) { if (ra.ra_proto == lpr[i].lpr_proto) { proto = lpr[i].lpr_name; break; @@ -197,16 +245,27 @@ lagg_status(int s) if (isport) printf(" laggdev %s", rp.rp_ifname); putchar('\n'); - if (verbose && ra.ra_proto == LAGG_PROTO_LACP) - printf("\tlag id: %s\n", - lacp_format_peer(lp, "\n\t\t ")); + if (verbose) { + printf("\tlagg options:\n"); + printb("\t\tflags", ro.ro_opts, LAGG_OPT_BITS); + putchar('\n'); + printf("\t\tflowid_shift: %d\n", ro.ro_flowid_shift); + printf("\tlagg statistics:\n"); + printf("\t\tactive ports: %d\n", ro.ro_active); + printf("\t\tflapping: %u\n", ro.ro_flapping); + if (ra.ra_proto == LAGG_PROTO_LACP) { + printf("\tlag id: %s\n", + lacp_format_peer(lp, "\n\t\t ")); + } + } for (i = 0; i < ra.ra_ports; i++) { lp = (struct lacp_opreq *)&rpbuf[i].rp_lacpreq; printf("\tlaggport: %s ", rpbuf[i].rp_portname); printb("flags", rpbuf[i].rp_flags, LAGG_PORT_BITS); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) - printf(" state=%X", lp->actor_state); + printb(" state", lp->actor_state, + LACP_STATE_BITS); putchar('\n'); if (verbose && ra.ra_proto == LAGG_PROTO_LACP) printf("\t\t%s\n", @@ -226,6 +285,15 @@ static struct cmd lagg_cmds[] = { DEF_CMD_ARG("-laggport", unsetlaggport), DEF_CMD_ARG("laggproto", setlaggproto), DEF_CMD_ARG("lagghash", setlagghash), + DEF_CMD("use_flowid", LAGG_OPT_USE_FLOWID, setlaggsetopt), + DEF_CMD("-use_flowid", -LAGG_OPT_USE_FLOWID, setlaggsetopt), + DEF_CMD("lacp_strict", LAGG_OPT_LACP_STRICT, setlaggsetopt), + DEF_CMD("-lacp_strict", -LAGG_OPT_LACP_STRICT, setlaggsetopt), + DEF_CMD("lacp_txtest", LAGG_OPT_LACP_TXTEST, setlaggsetopt), + DEF_CMD("-lacp_txtest", -LAGG_OPT_LACP_TXTEST, setlaggsetopt), + DEF_CMD("lacp_rxtest", LAGG_OPT_LACP_RXTEST, setlaggsetopt), + DEF_CMD("-lacp_rxtest", -LAGG_OPT_LACP_RXTEST, setlaggsetopt), + DEF_CMD_ARG("flowid_shift", setlaggflowidshift), }; static struct afswtch af_lagg = { .af_name = "af_lagg", Modified: stable/10/share/man/man4/lagg.4 ============================================================================== --- stable/10/share/man/man4/lagg.4 Sat Sep 12 20:14:54 2015 (r287722) +++ stable/10/share/man/man4/lagg.4 Sat Sep 12 20:36:39 2015 (r287723) @@ -16,7 +16,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 23, 2012 +.Dd October 1, 2014 .Dt LAGG 4 .Os .Sh NAME @@ -143,9 +143,9 @@ modes will use the RSS hash from the net computing one, this may give poor traffic distribution if the hash is invalid or uses less of the protocol header information. Local hash computation can be forced per interface by setting the -.Va net.link.lagg.X.use_flowid -.Xr sysctl 8 -variable to zero where X is the interface number. +.Cm use_flowid +.Xr ifconfig 8 +flag. The default for new interfaces is set via the .Va net.link.lagg.default_use_flowid .Xr sysctl 8 . Modified: stable/10/sys/net/ieee8023ad_lacp.c ============================================================================== --- stable/10/sys/net/ieee8023ad_lacp.c Sat Sep 12 20:14:54 2015 (r287722) +++ stable/10/sys/net/ieee8023ad_lacp.c Sat Sep 12 20:36:39 2015 (r287723) @@ -187,15 +187,15 @@ static const char *lacp_format_portid(co static void lacp_dprintf(const struct lacp_port *, const char *, ...) __attribute__((__format__(__printf__, 2, 3))); -static int lacp_debug = 0; +static VNET_DEFINE(int, lacp_debug); +#define V_lacp_debug VNET(lacp_debug) SYSCTL_NODE(_net_link_lagg, OID_AUTO, lacp, CTLFLAG_RD, 0, "ieee802.3ad"); -SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RW | CTLFLAG_TUN, - &lacp_debug, 0, "Enable LACP debug logging (1=debug, 2=trace)"); -TUNABLE_INT("net.link.lagg.lacp.debug", &lacp_debug); - -#define LACP_DPRINTF(a) if (lacp_debug & 0x01) { lacp_dprintf a ; } -#define LACP_TRACE(a) if (lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); } -#define LACP_TPRINTF(a) if (lacp_debug & 0x04) { lacp_dprintf a ; } +SYSCTL_INT(_net_link_lagg_lacp, OID_AUTO, debug, CTLFLAG_RWTUN | CTLFLAG_VNET, + &VNET_NAME(lacp_debug), 0, "Enable LACP debug logging (1=debug, 2=trace)"); + +#define LACP_DPRINTF(a) if (V_lacp_debug & 0x01) { lacp_dprintf a ; } +#define LACP_TRACE(a) if (V_lacp_debug & 0x02) { lacp_dprintf(a,"%s\n",__func__); } +#define LACP_TPRINTF(a) if (V_lacp_debug & 0x04) { lacp_dprintf a ; } /* * partner administration variables. @@ -298,7 +298,7 @@ lacp_pdu_input(struct lacp_port *lp, str goto bad; } - if (lacp_debug > 0) { + if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu receive\n"); lacp_dump_lacpdu(du); } @@ -383,7 +383,7 @@ lacp_xmit_lacpdu(struct lacp_port *lp) sizeof(du->ldu_collector)); du->ldu_collector.lci_maxdelay = 0; - if (lacp_debug > 0) { + if (V_lacp_debug > 0) { lacp_dprintf(lp, "lacpdu transmit\n"); lacp_dump_lacpdu(du); } @@ -495,12 +495,14 @@ lacp_tick(void *arg) if ((lp->lp_state & LACP_STATE_AGGREGATION) == 0) continue; + CURVNET_SET(lp->lp_ifp->if_vnet); lacp_run_timers(lp); lacp_select(lp); lacp_sm_mux(lp); lacp_sm_tx(lp); lacp_sm_ptx_tx_schedule(lp); + CURVNET_RESTORE(); } callout_reset(&lsc->lsc_callout, hz, lacp_tick, lsc); } @@ -577,12 +579,13 @@ lacp_port_destroy(struct lagg_port *lgp) lacp_disable_distributing(lp); lacp_unselect(lp); + LIST_REMOVE(lp, lp_next); + LACP_UNLOCK(lsc); + /* The address may have already been removed by if_purgemaddrs() */ if (!lgp->lp_detaching) if_delmulti_ifma(lp->lp_ifma); - LIST_REMOVE(lp, lp_next); - LACP_UNLOCK(lsc); free(lp, M_DEVBUF); } @@ -743,58 +746,19 @@ lacp_transit_expire(void *vp) LACP_LOCK_ASSERT(lsc); + CURVNET_SET(lsc->lsc_softc->sc_ifp->if_vnet); LACP_TRACE(NULL); + CURVNET_RESTORE(); lsc->lsc_suppress_distributing = FALSE; } -static void -lacp_attach_sysctl(struct lacp_softc *lsc, struct sysctl_oid *p_oid) -{ - struct lagg_softc *sc = lsc->lsc_softc; - - SYSCTL_ADD_UINT(&sc->ctx, SYSCTL_CHILDREN(p_oid), OID_AUTO, - "lacp_strict_mode", - CTLFLAG_RW, - &lsc->lsc_strict_mode, - lsc->lsc_strict_mode, - "Enable LACP strict mode"); -} - -static void -lacp_attach_sysctl_debug(struct lacp_softc *lsc, struct sysctl_oid *p_oid) -{ - struct lagg_softc *sc = lsc->lsc_softc; - struct sysctl_oid *oid; - - /* Create a child of the parent lagg interface */ - oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(p_oid), - OID_AUTO, "debug", CTLFLAG_RD, NULL, "DEBUG"); - - SYSCTL_ADD_UINT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "rx_test", - CTLFLAG_RW, - &lsc->lsc_debug.lsc_rx_test, - lsc->lsc_debug.lsc_rx_test, - "Bitmap of if_dunit entries to drop RX frames for"); - SYSCTL_ADD_UINT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "tx_test", - CTLFLAG_RW, - &lsc->lsc_debug.lsc_tx_test, - lsc->lsc_debug.lsc_tx_test, - "Bitmap of if_dunit entries to drop TX frames for"); -} - -int +void lacp_attach(struct lagg_softc *sc) { struct lacp_softc *lsc; - struct sysctl_oid *oid; - lsc = malloc(sizeof(struct lacp_softc), - M_DEVBUF, M_NOWAIT|M_ZERO); - if (lsc == NULL) - return (ENOMEM); + lsc = malloc(sizeof(struct lacp_softc), M_DEVBUF, M_WAITOK | M_ZERO); sc->sc_psc = (caddr_t)lsc; lsc->lsc_softc = sc; @@ -806,35 +770,24 @@ lacp_attach(struct lagg_softc *sc) TAILQ_INIT(&lsc->lsc_aggregators); LIST_INIT(&lsc->lsc_ports); - /* Create a child of the parent lagg interface */ - oid = SYSCTL_ADD_NODE(&sc->ctx, SYSCTL_CHILDREN(sc->sc_oid), - OID_AUTO, "lacp", CTLFLAG_RD, NULL, "LACP"); - - /* Attach sysctl nodes */ - lacp_attach_sysctl(lsc, oid); - lacp_attach_sysctl_debug(lsc, oid); - callout_init_mtx(&lsc->lsc_transit_callout, &lsc->lsc_mtx, 0); callout_init_mtx(&lsc->lsc_callout, &lsc->lsc_mtx, 0); /* if the lagg is already up then do the same */ if (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) lacp_init(sc); - - return (0); } int -lacp_detach(struct lagg_softc *sc) +lacp_detach(void *psc) { - struct lacp_softc *lsc = LACP_SOFTC(sc); + struct lacp_softc *lsc = (struct lacp_softc *)psc; KASSERT(TAILQ_EMPTY(&lsc->lsc_aggregators), ("aggregators still active")); KASSERT(lsc->lsc_active_aggregator == NULL, ("aggregator still attached")); - sc->sc_psc = NULL; callout_drain(&lsc->lsc_transit_callout); callout_drain(&lsc->lsc_callout); @@ -883,7 +836,7 @@ lacp_select_tx_port(struct lagg_softc *s return (NULL); } - if (sc->use_flowid && + if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) hash = m->m_pkthdr.flowid >> sc->flowid_shift; else @@ -1425,7 +1378,7 @@ lacp_sm_mux(struct lacp_port *lp) enum lacp_selected selected = lp->lp_selected; struct lacp_aggregator *la; - if (lacp_debug > 1) + if (V_lacp_debug > 1) lacp_dprintf(lp, "%s: state= 0x%x, selected= 0x%x, " "p_sync= 0x%x, p_collecting= 0x%x\n", __func__, lp->lp_mux_state, selected, p_sync, p_collecting); Modified: stable/10/sys/net/ieee8023ad_lacp.h ============================================================================== --- stable/10/sys/net/ieee8023ad_lacp.h Sat Sep 12 20:14:54 2015 (r287722) +++ stable/10/sys/net/ieee8023ad_lacp.h Sat Sep 12 20:36:39 2015 (r287723) @@ -75,6 +75,7 @@ "\007DEFAULTED" \ "\010EXPIRED" +#ifdef _KERNEL /* * IEEE802.3 slow protocols * @@ -282,8 +283,8 @@ struct lacp_softc { struct mbuf *lacp_input(struct lagg_port *, struct mbuf *); struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *); -int lacp_attach(struct lagg_softc *); -int lacp_detach(struct lagg_softc *); +void lacp_attach(struct lagg_softc *); +int lacp_detach(void *); void lacp_init(struct lagg_softc *); void lacp_stop(struct lagg_softc *); int lacp_port_create(struct lagg_port *); @@ -336,3 +337,4 @@ lacp_isdistributing(struct lagg_port *lg #define LACP_LAGIDSTR_MAX \ (1 + LACP_PARTNERSTR_MAX + 1 + LACP_PARTNERSTR_MAX + 1) #define LACP_STATESTR_MAX (255) /* XXX */ +#endif /* _KERNEL */ Modified: stable/10/sys/net/if_lagg.c ============================================================================== --- stable/10/sys/net/if_lagg.c Sat Sep 12 20:14:54 2015 (r287722) +++ stable/10/sys/net/if_lagg.c Sat Sep 12 20:36:39 2015 (r287723) @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include <net/if_types.h> #include <net/if_var.h> #include <net/bpf.h> +#include <net/vnet.h> #if defined(INET) || defined(INET6) #include <netinet/in.h> @@ -81,13 +82,21 @@ static struct { {0, NULL} }; -SLIST_HEAD(__trhead, lagg_softc) lagg_list; /* list of laggs */ -static struct mtx lagg_list_mtx; +VNET_DEFINE(SLIST_HEAD(__trhead, lagg_softc), lagg_list); /* list of laggs */ +#define V_lagg_list VNET(lagg_list) +static VNET_DEFINE(struct mtx, lagg_list_mtx); +#define V_lagg_list_mtx VNET(lagg_list_mtx) +#define LAGG_LIST_LOCK_INIT(x) mtx_init(&V_lagg_list_mtx, \ + "if_lagg list", NULL, MTX_DEF) +#define LAGG_LIST_LOCK_DESTROY(x) mtx_destroy(&V_lagg_list_mtx) +#define LAGG_LIST_LOCK(x) mtx_lock(&V_lagg_list_mtx) +#define LAGG_LIST_UNLOCK(x) mtx_unlock(&V_lagg_list_mtx) eventhandler_tag lagg_detach_cookie = NULL; static int lagg_clone_create(struct if_clone *, int, caddr_t); static void lagg_clone_destroy(struct ifnet *); -static struct if_clone *lagg_cloner; +static VNET_DEFINE(struct if_clone *, lagg_cloner); +#define V_lagg_cloner VNET(lagg_cloner) static const char laggname[] = "lagg"; static void lagg_lladdr(struct lagg_softc *, uint8_t *); @@ -122,24 +131,23 @@ static void lagg_media_status(struct ifn static struct lagg_port *lagg_link_active(struct lagg_softc *, struct lagg_port *); static const void *lagg_gethdr(struct mbuf *, u_int, u_int, void *); -static int lagg_sysctl_active(SYSCTL_HANDLER_ARGS); /* Simple round robin */ -static int lagg_rr_attach(struct lagg_softc *); +static void lagg_rr_attach(struct lagg_softc *); static int lagg_rr_detach(struct lagg_softc *); static int lagg_rr_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_rr_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Active failover */ -static int lagg_fail_attach(struct lagg_softc *); +static void lagg_fail_attach(struct lagg_softc *); static int lagg_fail_detach(struct lagg_softc *); static int lagg_fail_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_fail_input(struct lagg_softc *, struct lagg_port *, struct mbuf *); /* Loadbalancing */ -static int lagg_lb_attach(struct lagg_softc *); +static void lagg_lb_attach(struct lagg_softc *); static int lagg_lb_detach(struct lagg_softc *); static int lagg_lb_port_create(struct lagg_port *); static void lagg_lb_port_destroy(struct lagg_port *); @@ -149,7 +157,7 @@ static struct mbuf *lagg_lb_input(struct static int lagg_lb_porttable(struct lagg_softc *, struct lagg_port *); /* 802.3ad LACP */ -static int lagg_lacp_attach(struct lagg_softc *); +static void lagg_lacp_attach(struct lagg_softc *); static int lagg_lacp_detach(struct lagg_softc *); static int lagg_lacp_start(struct lagg_softc *, struct mbuf *); static struct mbuf *lagg_lacp_input(struct lagg_softc *, struct lagg_port *, @@ -159,9 +167,9 @@ static void lagg_lacp_lladdr(struct lagg static void lagg_callout(void *); /* lagg protocol table */ -static const struct { - int ti_proto; - int (*ti_attach)(struct lagg_softc *); +static const struct lagg_proto { + lagg_proto ti_proto; + void (*ti_attach)(struct lagg_softc *); } lagg_protos[] = { { LAGG_PROTO_ROUNDROBIN, lagg_rr_attach }, { LAGG_PROTO_FAILOVER, lagg_fail_attach }, @@ -175,31 +183,55 @@ SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, OID_AUTO, lagg, CTLFLAG_RW, 0, "Link Aggregation"); -static int lagg_failover_rx_all = 0; /* Allow input on any failover links */ -SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW, - &lagg_failover_rx_all, 0, +/* Allow input on any failover links */ +static VNET_DEFINE(int, lagg_failover_rx_all); +#define V_lagg_failover_rx_all VNET(lagg_failover_rx_all) +SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(lagg_failover_rx_all), 0, "Accept input from any interface in a failover lagg"); -static int def_use_flowid = 1; /* Default value for using flowid */ -TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid); -SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW, - &def_use_flowid, 0, + +/* Default value for using M_FLOWID */ +static VNET_DEFINE(int, def_use_flowid) = 1; +#define V_def_use_flowid VNET(def_use_flowid) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RWTUN, + &VNET_NAME(def_use_flowid), 0, "Default setting for using flow id for load sharing"); -static int def_flowid_shift = 16; /* Default value for using flow shift */ -TUNABLE_INT("net.link.lagg.default_flowid_shift", &def_flowid_shift); -SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RW, - &def_flowid_shift, 0, + +/* Default value for using M_FLOWID */ +static VNET_DEFINE(int, def_flowid_shift) = 16; +#define V_def_flowid_shift VNET(def_flowid_shift) +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_flowid_shift, CTLFLAG_RWTUN, + &VNET_NAME(def_flowid_shift), 0, "Default setting for flowid shift for load sharing"); +static void +vnet_lagg_init(const void *unused __unused) +{ + + LAGG_LIST_LOCK_INIT(); + SLIST_INIT(&V_lagg_list); + V_lagg_cloner = if_clone_simple(laggname, lagg_clone_create, + lagg_clone_destroy, 0); +} +VNET_SYSINIT(vnet_lagg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_lagg_init, NULL); + +static void +vnet_lagg_uninit(const void *unused __unused) +{ + + if_clone_detach(V_lagg_cloner); + LAGG_LIST_LOCK_DESTROY(); +} +VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_lagg_uninit, NULL); + static int lagg_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: - mtx_init(&lagg_list_mtx, "if_lagg list", NULL, MTX_DEF); - SLIST_INIT(&lagg_list); - lagg_cloner = if_clone_simple(laggname, lagg_clone_create, - lagg_clone_destroy, 0); lagg_input_p = lagg_input; lagg_linkstate_p = lagg_port_state; lagg_detach_cookie = EVENTHANDLER_REGISTER( @@ -209,10 +241,8 @@ lagg_modevent(module_t mod, int type, vo case MOD_UNLOAD: EVENTHANDLER_DEREGISTER(ifnet_departure_event, lagg_detach_cookie); - if_clone_detach(lagg_cloner); lagg_input_p = NULL; lagg_linkstate_p = NULL; - mtx_destroy(&lagg_list_mtx); break; default: return (EOPNOTSUPP); @@ -278,10 +308,8 @@ lagg_clone_create(struct if_clone *ifc, { struct lagg_softc *sc; struct ifnet *ifp; - int i, error = 0; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ - struct sysctl_oid *oid; - char num[14]; /* sufficient for 32 bits */ + int i; sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); @@ -295,29 +323,10 @@ lagg_clone_create(struct if_clone *ifc, sc->sc_ibytes = counter_u64_alloc(M_WAITOK); sc->sc_obytes = counter_u64_alloc(M_WAITOK); - sysctl_ctx_init(&sc->ctx); - snprintf(num, sizeof(num), "%u", unit); - sc->use_flowid = def_use_flowid; - sc->flowid_shift = def_flowid_shift; - sc->sc_oid = oid = SYSCTL_ADD_NODE(&sc->ctx, - &SYSCTL_NODE_CHILDREN(_net_link, lagg), - OID_AUTO, num, CTLFLAG_RD, NULL, ""); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "use_flowid", CTLFLAG_RW, &sc->use_flowid, - sc->use_flowid, "Use flow id for load sharing"); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "flowid_shift", CTLFLAG_RW, &sc->flowid_shift, - sc->flowid_shift, - "Shift flowid bits to prevent multiqueue collisions"); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "count", CTLFLAG_RD, &sc->sc_count, sc->sc_count, - "Total number of ports"); - SYSCTL_ADD_PROC(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "active", CTLTYPE_INT|CTLFLAG_RD, sc, 0, lagg_sysctl_active, - "I", "Total number of active ports"); - SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, - "flapping", CTLFLAG_RD, &sc->sc_flapping, - sc->sc_flapping, "Total number of port change events"); + if (V_def_use_flowid) + sc->sc_opts |= LAGG_OPT_USE_FLOWID; + sc->flowid_shift = V_def_flowid_shift; + /* Hash all layers by default */ sc->sc_flags = LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4; @@ -325,11 +334,7 @@ lagg_clone_create(struct if_clone *ifc, for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) { sc->sc_proto = lagg_protos[i].ti_proto; - if ((error = lagg_protos[i].ti_attach(sc)) != 0) { - if_free(ifp); - free(sc, M_DEVBUF); - return (error); - } + lagg_protos[i].ti_attach(sc); break; } } @@ -371,9 +376,9 @@ lagg_clone_create(struct if_clone *ifc, lagg_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); /* Insert into the global list of laggs */ - mtx_lock(&lagg_list_mtx); - SLIST_INSERT_HEAD(&lagg_list, sc, sc_entries); - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_LOCK(); + SLIST_INSERT_HEAD(&V_lagg_list, sc, sc_entries); + LAGG_LIST_UNLOCK(); callout_reset(&sc->sc_callout, hz, lagg_callout, sc); @@ -400,10 +405,9 @@ lagg_clone_destroy(struct ifnet *ifp) /* Unhook the aggregation protocol */ if (sc->sc_detach != NULL) (*sc->sc_detach)(sc); + else + LAGG_WUNLOCK(sc); - LAGG_WUNLOCK(sc); - - sysctl_ctx_free(&sc->ctx); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free(ifp); @@ -417,9 +421,9 @@ lagg_clone_destroy(struct ifnet *ifp) counter_u64_free(sc->sc_ibytes); counter_u64_free(sc->sc_obytes); - mtx_lock(&lagg_list_mtx); - SLIST_REMOVE(&lagg_list, sc, lagg_softc, sc_entries); - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_LOCK(); + SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries); + LAGG_LIST_UNLOCK(); taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task); LAGG_LOCK_DESTROY(sc); @@ -431,15 +435,28 @@ static void lagg_lladdr(struct lagg_softc *sc, uint8_t *lladdr) { struct ifnet *ifp = sc->sc_ifp; + struct lagg_port lp; if (memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) return; + LAGG_WLOCK_ASSERT(sc); + /* + * Set the link layer address on the lagg interface. + * sc_lladdr() notifies the MAC change to + * the aggregation protocol. iflladdr_event handler which + * may trigger gratuitous ARPs for INET will be handled in + * a taskqueue. + */ bcopy(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN); - /* Let the protocol know the MAC has changed */ if (sc->sc_lladdr != NULL) (*sc->sc_lladdr)(sc); - EVENTHANDLER_INVOKE(iflladdr_event, ifp); + + bzero(&lp, sizeof(lp)); + lp.lp_ifp = sc->sc_ifp; + lp.lp_softc = sc; + + lagg_port_lladdr(&lp, lladdr); } static void @@ -487,11 +504,13 @@ lagg_port_lladdr(struct lagg_port *lp, u struct ifnet *ifp = lp->lp_ifp; struct lagg_llq *llq; int pending = 0; + int primary; LAGG_WLOCK_ASSERT(sc); - if (lp->lp_detaching || - memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0) + primary = (sc->sc_primary->lp_ifp == ifp) ? 1 : 0; + if (primary == 0 && (lp->lp_detaching || + memcmp(lladdr, IF_LLADDR(ifp), ETHER_ADDR_LEN) == 0)) return; /* Check to make sure its not already queued to be changed */ @@ -510,6 +529,7 @@ lagg_port_lladdr(struct lagg_port *lp, u /* Update the lladdr even if pending, it may have changed */ llq->llq_ifp = ifp; + llq->llq_primary = primary; bcopy(lladdr, llq->llq_lladdr, ETHER_ADDR_LEN); if (!pending) @@ -542,14 +562,20 @@ lagg_port_setlladdr(void *arg, int pendi for (llq = head; llq != NULL; llq = head) { ifp = llq->llq_ifp; - /* Set the link layer address */ CURVNET_SET(ifp->if_vnet); - error = if_setlladdr(ifp, llq->llq_lladdr, ETHER_ADDR_LEN); + if (llq->llq_primary == 0) { + /* + * Set the link layer address on the laggport interface. + * if_setlladdr() triggers gratuitous ARPs for INET. + */ + error = if_setlladdr(ifp, llq->llq_lladdr, + ETHER_ADDR_LEN); + if (error) + printf("%s: setlladdr failed on %s\n", __func__, + ifp->if_xname); + } else + EVENTHANDLER_INVOKE(iflladdr_event, ifp); CURVNET_RESTORE(); - if (error) - printf("%s: setlladdr failed on %s\n", __func__, - ifp->if_xname); - head = SLIST_NEXT(llq, llq_entries); free(llq, M_DEVBUF); } @@ -581,34 +607,6 @@ lagg_port_create(struct lagg_softc *sc, if (ifp->if_type != IFT_ETHER) return (EPROTONOSUPPORT); -#ifdef INET6 - /* - * The member interface should not have inet6 address because - * two interfaces with a valid link-local scope zone must not be - * merged in any form. This restriction is needed to - * prevent violation of link-local scope zone. Attempts to - * add a member interface which has inet6 addresses triggers - * removal of all inet6 addresses on the member interface. - */ - SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { - if (in6ifa_llaonifp(lp->lp_ifp)) { - in6_ifdetach(lp->lp_ifp); - if_printf(sc->sc_ifp, - "IPv6 addresses on %s have been removed " - "before adding it as a member to prevent " - "IPv6 address scope violation.\n", - lp->lp_ifp->if_xname); - } - } - if (in6ifa_llaonifp(ifp)) { - in6_ifdetach(ifp); - if_printf(sc->sc_ifp, - "IPv6 addresses on %s have been removed " - "before adding it as a member to prevent " - "IPv6 address scope violation.\n", - ifp->if_xname); - } -#endif /* Allow the first Ethernet member to define the MTU */ if (SLIST_EMPTY(&sc->sc_ports)) sc->sc_ifp->if_mtu = ifp->if_mtu; @@ -623,10 +621,10 @@ lagg_port_create(struct lagg_softc *sc, return (ENOMEM); /* Check if port is a stacked lagg */ - mtx_lock(&lagg_list_mtx); - SLIST_FOREACH(sc_ptr, &lagg_list, sc_entries) { + LAGG_LIST_LOCK(); + SLIST_FOREACH(sc_ptr, &V_lagg_list, sc_entries) { if (ifp == sc_ptr->sc_ifp) { - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); return (EINVAL); /* XXX disable stacking for the moment, its untested */ @@ -634,14 +632,14 @@ lagg_port_create(struct lagg_softc *sc, lp->lp_flags |= LAGG_PORT_STACK; if (lagg_port_checkstacking(sc_ptr) >= LAGG_MAX_STACKING) { - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); return (E2BIG); } #endif } } - mtx_unlock(&lagg_list_mtx); + LAGG_LIST_UNLOCK(); /* Change the interface type */ lp->lp_iftype = ifp->if_type; @@ -991,10 +989,12 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd { struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; struct lagg_reqall *ra = (struct lagg_reqall *)data; + struct lagg_reqopts *ro = (struct lagg_reqopts *)data; struct lagg_reqport *rp = (struct lagg_reqport *)data, rpbuf; struct lagg_reqflags *rf = (struct lagg_reqflags *)data; struct ifreq *ifr = (struct ifreq *)data; struct lagg_port *lp; + const struct lagg_proto *proto = NULL; struct ifnet *tpif; struct thread *td = curthread; char *buf, *outbuf; @@ -1042,50 +1042,136 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd error = priv_check(td, PRIV_NET_LAGG); if (error) break; - if (ra->ra_proto >= LAGG_PROTO_MAX) { + for (proto = lagg_protos; proto->ti_proto != LAGG_PROTO_NONE; + proto++) { + if (proto->ti_proto == ra->ra_proto) { + if (sc->sc_ifflags & IFF_DEBUG) + printf("%s: using proto %u\n", + sc->sc_ifname, proto->ti_proto); + break; + } + } + if (proto->ti_proto == LAGG_PROTO_NONE) { error = EPROTONOSUPPORT; break; } + /* Set to LAGG_PROTO_NONE during the attach. */ LAGG_WLOCK(sc); if (sc->sc_proto != LAGG_PROTO_NONE) { - /* Reset protocol first in case detach unlocks */ sc->sc_proto = LAGG_PROTO_NONE; - error = sc->sc_detach(sc); - sc->sc_detach = NULL; - sc->sc_start = NULL; - sc->sc_input = NULL; - sc->sc_port_create = NULL; - sc->sc_port_destroy = NULL; - sc->sc_linkstate = NULL; - sc->sc_init = NULL; - sc->sc_stop = NULL; - sc->sc_lladdr = NULL; - sc->sc_req = NULL; - sc->sc_portreq = NULL; - } else if (sc->sc_input != NULL) { - /* Still detaching */ - error = EBUSY; + if (sc->sc_detach != NULL) + sc->sc_detach(sc); + else + LAGG_WUNLOCK(sc); } - if (error != 0) { - LAGG_WUNLOCK(sc); + proto->ti_attach(sc); + LAGG_WLOCK(sc); + sc->sc_proto = proto->ti_proto; + LAGG_WUNLOCK(sc); + break; + case SIOCGLAGGOPTS: + ro->ro_opts = sc->sc_opts; + if (sc->sc_proto == LAGG_PROTO_LACP) { + struct lacp_softc *lsc; + + lsc = (struct lacp_softc *)sc->sc_psc; + if (lsc->lsc_debug.lsc_tx_test != 0) + ro->ro_opts |= LAGG_OPT_LACP_TXTEST; + if (lsc->lsc_debug.lsc_rx_test != 0) + ro->ro_opts |= LAGG_OPT_LACP_RXTEST; + if (lsc->lsc_strict_mode != 0) + ro->ro_opts |= LAGG_OPT_LACP_STRICT; + + ro->ro_active = sc->sc_active; + } else { + ro->ro_active = 0; + SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) + ro->ro_active += LAGG_PORTACTIVE(lp); + } + ro->ro_flapping = sc->sc_flapping; + ro->ro_flowid_shift = sc->flowid_shift; + break; + case SIOCSLAGGOPTS: + error = priv_check(td, PRIV_NET_LAGG); + if (error) + break; + if (ro->ro_opts == 0) + break; + /* + * Set options. LACP options are stored in sc->sc_psc, + * not in sc_opts. + */ + int valid, lacp; + + switch (ro->ro_opts) { + case LAGG_OPT_USE_FLOWID: + case -LAGG_OPT_USE_FLOWID: + case LAGG_OPT_FLOWIDSHIFT: + valid = 1; + lacp = 0; + break; + case LAGG_OPT_LACP_TXTEST: + case -LAGG_OPT_LACP_TXTEST: + case LAGG_OPT_LACP_RXTEST: + case -LAGG_OPT_LACP_RXTEST: + case LAGG_OPT_LACP_STRICT: + case -LAGG_OPT_LACP_STRICT: + valid = lacp = 1; + break; + default: + valid = lacp = 0; break; } - for (int i = 0; i < (sizeof(lagg_protos) / - sizeof(lagg_protos[0])); i++) { - if (lagg_protos[i].ti_proto == ra->ra_proto) { - if (sc->sc_ifflags & IFF_DEBUG) - printf("%s: using proto %u\n", - sc->sc_ifname, - lagg_protos[i].ti_proto); - sc->sc_proto = lagg_protos[i].ti_proto; - if (sc->sc_proto != LAGG_PROTO_NONE) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201509122036.t8CKae5m036054>