Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 Oct 2020 09:47:12 +0000 (UTC)
From:      Hans Petter Selasky <hselasky@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r366933 - in head: sbin/ifconfig share/man/man4 sys/net
Message-ID:  <202010220947.09M9lCYA051492@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: hselasky
Date: Thu Oct 22 09:47:12 2020
New Revision: 366933
URL: https://svnweb.freebsd.org/changeset/base/366933

Log:
  Add support for IP over infiniband, IPoIB, to lagg(4). Currently only
  the failover protocol is supported due to limitations in the IPoIB
  architecture. Refer to the lagg(4) manual page for how to configure
  and use this new feature. A new network interface type,
  IFT_INFINIBANDLAG, has been added, similar to the existing
  IFT_IEEE8023ADLAG .
  
  ifconfig(8) has been updated to accept a new laggtype argument when
  creating lagg(4) network interfaces. This new argument is used to
  distinguish between ethernet and infiniband type of lagg(4) network
  interface. The laggtype argument is optional and defaults to
  ethernet. The lagg(4) command line syntax is backwards compatible.
  
  Differential Revision:	https://reviews.freebsd.org/D26254
  Reviewed by:		melifaro@
  MFC after:		1 week
  Sponsored by:		Mellanox Technologies // NVIDIA Networking

Modified:
  head/sbin/ifconfig/ifconfig.8
  head/sbin/ifconfig/iflagg.c
  head/share/man/man4/lagg.4
  head/sys/net/ieee8023ad_lacp.c
  head/sys/net/if_ethersubr.c
  head/sys/net/if_infiniband.c
  head/sys/net/if_lagg.c
  head/sys/net/if_lagg.h
  head/sys/net/if_types.h

Modified: head/sbin/ifconfig/ifconfig.8
==============================================================================
--- head/sbin/ifconfig/ifconfig.8	Thu Oct 22 09:28:11 2020	(r366932)
+++ head/sbin/ifconfig/ifconfig.8	Thu Oct 22 09:47:12 2020	(r366933)
@@ -28,7 +28,7 @@
 .\"     From: @(#)ifconfig.8	8.3 (Berkeley) 1/5/94
 .\" $FreeBSD$
 .\"
-.Dd September 17, 2020
+.Dd October 21, 2020
 .Dt IFCONFIG 8
 .Os
 .Sh NAME
@@ -2481,6 +2481,12 @@ Set to 0 to disable.
 .Pp
 The following parameters are specific to lagg interfaces:
 .Bl -tag -width indent
+.It Cm laggtype Ar type
+When creating a lagg interface the type can be specified as either
+.Cm ethernet
+or
+.Cm infiniband .
+If not specified ethernet is the default lagg type.
 .It Cm laggport Ar interface
 Add the interface named by
 .Ar interface

Modified: head/sbin/ifconfig/iflagg.c
==============================================================================
--- head/sbin/ifconfig/iflagg.c	Thu Oct 22 09:28:11 2020	(r366932)
+++ head/sbin/ifconfig/iflagg.c	Thu Oct 22 09:47:12 2020	(r366933)
@@ -30,8 +30,12 @@ static const char rcsid[] =
 
 #include "ifconfig.h"
 
-char lacpbuf[120];	/* LACP peer '[(a,a,a),(p,p,p)]' */
+static struct iflaggparam params = {
+	.lagg_type = LAGG_TYPE_DEFAULT,
+};
 
+static char lacpbuf[120];	/* LACP peer '[(a,a,a),(p,p,p)]' */
+
 static void
 setlaggport(const char *val, int d, int s, const struct afswtch *afp)
 {
@@ -301,7 +305,31 @@ lagg_status(int s)
 	}
 }
 
+static
+DECL_CMD_FUNC(setlaggtype, arg, d)
+{
+	static const struct lagg_types lt[] = LAGG_TYPES;
+	int i;
+
+	for (i = 0; i < nitems(lt); i++) {
+		if (strcmp(arg, lt[i].lt_name) == 0) {
+			params.lagg_type = lt[i].lt_value;
+			return;
+		}
+	}
+	errx(1, "invalid lagg type: %s", arg);
+}
+
+static void
+lagg_create(int s, struct ifreq *ifr)
+{
+	ifr->ifr_data = (caddr_t) &params;
+	if (ioctl(s, SIOCIFCREATE2, ifr) < 0)
+		err(1, "SIOCIFCREATE2");
+}
+
 static struct cmd lagg_cmds[] = {
+	DEF_CLONE_CMD_ARG("laggtype",   setlaggtype),
 	DEF_CMD_ARG("laggport",		setlaggport),
 	DEF_CMD_ARG("-laggport",	unsetlaggport),
 	DEF_CMD_ARG("laggproto",	setlaggproto),
@@ -335,4 +363,5 @@ lagg_ctor(void)
 	for (i = 0; i < nitems(lagg_cmds);  i++)
 		cmd_register(&lagg_cmds[i]);
 	af_register(&af_lagg);
+	clone_setdefcallback("lagg", lagg_create);
 }

Modified: head/share/man/man4/lagg.4
==============================================================================
--- head/share/man/man4/lagg.4	Thu Oct 22 09:28:11 2020	(r366932)
+++ head/share/man/man4/lagg.4	Thu Oct 22 09:47:12 2020	(r366933)
@@ -16,7 +16,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 18, 2017
+.Dd October 21, 2020
 .Dt LAGG 4
 .Os
 .Sh NAME
@@ -192,6 +192,15 @@ device will be used:
 .Pp
 (Note the mac address of the wireless device is forced to match the wired
 device as a workaround.)
+.Pp
+The following example shows how to create an infiniband failover interface.
+.Bd -literal -offset indent
+# ifconfig ib0 up
+# ifconfig ib1 up
+# ifconfig lagg0 create laggtype infiniband
+# ifconfig lagg0 laggproto failover laggport ib0 laggport ib1 \e
+	1.1.1.1 netmask 255.255.255.0
+.Ed
 .Sh SEE ALSO
 .Xr ng_one2many 4 ,
 .Xr ifconfig 8 ,

Modified: head/sys/net/ieee8023ad_lacp.c
==============================================================================
--- head/sys/net/ieee8023ad_lacp.c	Thu Oct 22 09:28:11 2020	(r366932)
+++ head/sys/net/ieee8023ad_lacp.c	Thu Oct 22 09:47:12 2020	(r366933)
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/ethernet.h>
+#include <net/infiniband.h>
 #include <net/if_media.h>
 #include <net/if_types.h>
 

Modified: head/sys/net/if_ethersubr.c
==============================================================================
--- head/sys/net/if_ethersubr.c	Thu Oct 22 09:28:11 2020	(r366932)
+++ head/sys/net/if_ethersubr.c	Thu Oct 22 09:47:12 2020	(r366933)
@@ -110,7 +110,7 @@ void	(*vlan_input_p)(struct ifnet *, struct mbuf *);
 void	(*bridge_dn_p)(struct mbuf *, struct ifnet *);
 
 /* if_lagg(4) support */
-struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); 
+struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); 
 
 static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] =
 			{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -608,9 +608,9 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m
 
 	/* Handle input from a lagg(4) port */
 	if (ifp->if_type == IFT_IEEE8023ADLAG) {
-		KASSERT(lagg_input_p != NULL,
+		KASSERT(lagg_input_ethernet_p != NULL,
 		    ("%s: if_lagg not loaded!", __func__));
-		m = (*lagg_input_p)(ifp, m);
+		m = (*lagg_input_ethernet_p)(ifp, m);
 		if (m != NULL)
 			ifp = m->m_pkthdr.rcvif;
 		else {

Modified: head/sys/net/if_infiniband.c
==============================================================================
--- head/sys/net/if_infiniband.c	Thu Oct 22 09:28:11 2020	(r366932)
+++ head/sys/net/if_infiniband.c	Thu Oct 22 09:47:12 2020	(r366933)
@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_media.h>
+#include <net/if_lagg.h>
 
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
@@ -58,6 +59,9 @@ __FBSDID("$FreeBSD$");
 
 #include <security/mac/mac_framework.h>
 
+/* if_lagg(4) support */
+struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *); 
+
 #ifdef INET
 static inline void
 infiniband_ipv4_multicast_map(uint32_t addr,
@@ -345,6 +349,16 @@ infiniband_input(struct ifnet *ifp, struct mbuf *m)
 
 	/* Direct packet to correct FIB based on interface config. */
 	M_SETFIB(m, ifp->if_fib);
+
+	/* Handle input from a lagg<N> port */
+	if (ifp->if_type == IFT_INFINIBANDLAG) {
+		KASSERT(lagg_input_infiniband_p != NULL,
+		    ("%s: if_lagg not loaded!", __func__));
+		m = (*lagg_input_infiniband_p)(ifp, m);
+		if (__predict_false(m == NULL))
+			goto done;
+		ifp = m->m_pkthdr.rcvif;
+	}
 
 	/*
 	 * Dispatch frame to upper layer.

Modified: head/sys/net/if_lagg.c
==============================================================================
--- head/sys/net/if_lagg.c	Thu Oct 22 09:28:11 2020	(r366932)
+++ head/sys/net/if_lagg.c	Thu Oct 22 09:47:12 2020	(r366933)
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
 #include <net/bpf.h>
 #include <net/route.h>
 #include <net/vnet.h>
+#include <net/infiniband.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
@@ -131,7 +132,8 @@ static MALLOC_DEFINE(M_LAGG, laggname, "802.3AD Link A
 static void	lagg_capabilities(struct lagg_softc *);
 static int	lagg_port_create(struct lagg_softc *, struct ifnet *);
 static int	lagg_port_destroy(struct lagg_port *, int);
-static struct mbuf *lagg_input(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_ethernet(struct ifnet *, struct mbuf *);
+static struct mbuf *lagg_input_infiniband(struct ifnet *, struct mbuf *);
 static void	lagg_linkstate(struct lagg_softc *);
 static void	lagg_port_state(struct ifnet *, int);
 static int	lagg_port_ioctl(struct ifnet *, u_long, caddr_t);
@@ -164,7 +166,8 @@ static	int	lagg_setflag(struct lagg_port *, int, int,
 		    int (*func)(struct ifnet *, int));
 static	int	lagg_setflags(struct lagg_port *, int status);
 static uint64_t lagg_get_counter(struct ifnet *ifp, ift_counter cnt);
-static int	lagg_transmit(struct ifnet *, struct mbuf *);
+static int	lagg_transmit_ethernet(struct ifnet *, struct mbuf *);
+static int	lagg_transmit_infiniband(struct ifnet *, struct mbuf *);
 static void	lagg_qflush(struct ifnet *);
 static int	lagg_media_change(struct ifnet *);
 static void	lagg_media_status(struct ifnet *, struct ifmediareq *);
@@ -327,7 +330,8 @@ lagg_modevent(module_t mod, int type, void *data)
 
 	switch (type) {
 	case MOD_LOAD:
-		lagg_input_p = lagg_input;
+		lagg_input_ethernet_p = lagg_input_ethernet;
+		lagg_input_infiniband_p = lagg_input_infiniband;
 		lagg_linkstate_p = lagg_port_state;
 		lagg_detach_cookie = EVENTHANDLER_REGISTER(
 		    ifnet_departure_event, lagg_port_ifdetach, NULL,
@@ -336,7 +340,8 @@ lagg_modevent(module_t mod, int type, void *data)
 	case MOD_UNLOAD:
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    lagg_detach_cookie);
-		lagg_input_p = NULL;
+		lagg_input_ethernet_p = NULL;
+		lagg_input_infiniband_p = NULL;
 		lagg_linkstate_p = NULL;
 		break;
 	default:
@@ -353,6 +358,7 @@ static moduledata_t lagg_mod = {
 
 DECLARE_MODULE(if_lagg, lagg_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(if_lagg, 1);
+MODULE_DEPEND(if_lagg, if_infiniband, 1, 1, 1);
 
 static void
 lagg_proto_attach(struct lagg_softc *sc, lagg_proto pr)
@@ -504,18 +510,48 @@ lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_i
 static int
 lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 {
+	struct iflaggparam iflp;
 	struct lagg_softc *sc;
 	struct ifnet *ifp;
-	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
+	int if_type;
+	int error;
+	static const uint8_t eaddr[LAGG_ADDR_LEN];
+	static const uint8_t ib_bcast_addr[INFINIBAND_ADDR_LEN] = {
+		0x00, 0xff, 0xff, 0xff,
+		0xff, 0x12, 0x40, 0x1b,	0x00, 0x00, 0x00, 0x00,
+		0x00, 0x00, 0x00, 0x00,	0xff, 0xff, 0xff, 0xff
+	};
 
+	if (params != NULL) {
+		error = copyin(params, &iflp, sizeof(iflp));
+		if (error)
+			return (error);
+
+		switch (iflp.lagg_type) {
+		case LAGG_TYPE_ETHERNET:
+			if_type = IFT_ETHER;
+			break;
+		case LAGG_TYPE_INFINIBAND:
+			if_type = IFT_INFINIBAND;
+			break;
+		default:
+			return (EINVAL);
+		}
+	} else {
+		if_type = IFT_ETHER;
+	}
+
 	sc = malloc(sizeof(*sc), M_LAGG, M_WAITOK|M_ZERO);
-	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
+	ifp = sc->sc_ifp = if_alloc(if_type);
 	if (ifp == NULL) {
 		free(sc, M_LAGG);
 		return (ENOSPC);
 	}
 	LAGG_SX_INIT(sc);
 
+	mtx_init(&sc->sc_mtx, "lagg-mtx", NULL, MTX_DEF);
+	callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0);
+
 	LAGG_XLOCK(sc);
 	if (V_def_use_flowid)
 		sc->sc_opts |= LAGG_OPT_USE_FLOWID;
@@ -530,15 +566,25 @@ lagg_clone_create(struct if_clone *ifc, int unit, cadd
 
 	CK_SLIST_INIT(&sc->sc_ports);
 
-	/* Initialise pseudo media types */
-	ifmedia_init(&sc->sc_media, 0, lagg_media_change,
-	    lagg_media_status);
-	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
-	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
+	switch (if_type) {
+	case IFT_ETHER:
+		/* Initialise pseudo media types */
+		ifmedia_init(&sc->sc_media, 0, lagg_media_change,
+		    lagg_media_status);
+		ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+		ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
 
-	if_initname(ifp, laggname, unit);
+		if_initname(ifp, laggname, unit);
+		ifp->if_transmit = lagg_transmit_ethernet;
+		break;
+	case IFT_INFINIBAND:
+		if_initname(ifp, laggname, unit);
+		ifp->if_transmit = lagg_transmit_infiniband;
+		break;
+	default:
+		break;
+	}
 	ifp->if_softc = sc;
-	ifp->if_transmit = lagg_transmit;
 	ifp->if_qflush = lagg_qflush;
 	ifp->if_init = lagg_init;
 	ifp->if_ioctl = lagg_ioctl;
@@ -555,9 +601,18 @@ lagg_clone_create(struct if_clone *ifc, int unit, cadd
 
 	/*
 	 * Attach as an ordinary ethernet device, children will be attached
-	 * as special device IFT_IEEE8023ADLAG.
+	 * as special device IFT_IEEE8023ADLAG or IFT_INFINIBANDLAG.
 	 */
-	ether_ifattach(ifp, eaddr);
+	switch (if_type) {
+	case IFT_ETHER:
+		ether_ifattach(ifp, eaddr);
+		break;
+	case IFT_INFINIBAND:
+		infiniband_ifattach(ifp, eaddr, ib_bcast_addr);
+		break;
+	default:
+		break;
+	}
 
 	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
 		lagg_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
@@ -595,14 +650,24 @@ lagg_clone_destroy(struct ifnet *ifp)
 	lagg_proto_detach(sc);
 	LAGG_XUNLOCK(sc);
 
-	ifmedia_removeall(&sc->sc_media);
-	ether_ifdetach(ifp);
+	switch (ifp->if_type) {
+	case IFT_ETHER:
+		ifmedia_removeall(&sc->sc_media);
+		ether_ifdetach(ifp);
+		break;
+	case IFT_INFINIBAND:
+		infiniband_ifdetach(ifp);
+		break;
+	default:
+		break;
+	}
 	if_free(ifp);
 
 	LAGG_LIST_LOCK();
 	SLIST_REMOVE(&V_lagg_list, sc, lagg_softc, sc_entries);
 	LAGG_LIST_UNLOCK();
 
+	mtx_destroy(&sc->sc_mtx);
 	LAGG_SX_DESTROY(sc);
 	free(sc, M_LAGG);
 }
@@ -669,6 +734,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *
 	struct lagg_port *lp, *tlp;
 	struct ifreq ifr;
 	int error, i, oldmtu;
+	int if_type;
 	uint64_t *pval;
 
 	LAGG_XLOCK_ASSERT(sc);
@@ -695,9 +761,22 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *
 		return (EBUSY);
 	}
 
-	/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
-	if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
-		return (EPROTONOSUPPORT);
+	switch (sc->sc_ifp->if_type) {
+	case IFT_ETHER:
+		/* XXX Disallow non-ethernet interfaces (this should be any of 802) */
+		if (ifp->if_type != IFT_ETHER && ifp->if_type != IFT_L2VLAN)
+			return (EPROTONOSUPPORT);
+		if_type = IFT_IEEE8023ADLAG;
+		break;
+	case IFT_INFINIBAND:
+		/* XXX Disallow non-infiniband interfaces */
+		if (ifp->if_type != IFT_INFINIBAND)
+			return (EPROTONOSUPPORT);
+		if_type = IFT_INFINIBANDLAG;
+		break;
+	default:
+		break;
+	}
 
 	/* Allow the first Ethernet member to define the MTU */
 	oldmtu = -1;
@@ -754,14 +833,14 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *
 	if_ref(ifp);
 	lp->lp_ifp = ifp;
 
-	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ETHER_ADDR_LEN);
+	bcopy(IF_LLADDR(ifp), lp->lp_lladdr, ifp->if_addrlen);
 	lp->lp_ifcapenable = ifp->if_capenable;
 	if (CK_SLIST_EMPTY(&sc->sc_ports)) {
-		bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+		bcopy(IF_LLADDR(ifp), IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
 		lagg_proto_lladdr(sc);
 		EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 	} else {
-		if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+		if_setlladdr(ifp, IF_LLADDR(sc->sc_ifp), ifp->if_addrlen);
 	}
 	lagg_setflags(lp, 1);
 
@@ -770,7 +849,7 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *
 
 	/* Change the interface type */
 	lp->lp_iftype = ifp->if_type;
-	ifp->if_type = IFT_IEEE8023ADLAG;
+	ifp->if_type = if_type;
 	ifp->if_lagg = lp;
 	lp->lp_ioctl = ifp->if_ioctl;
 	ifp->if_ioctl = lagg_port_ioctl;
@@ -887,15 +966,15 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport
 
 	/* Update the primary interface */
 	if (lp == sc->sc_primary) {
-		uint8_t lladdr[ETHER_ADDR_LEN];
+		uint8_t lladdr[LAGG_ADDR_LEN];
 
 		if ((lp0 = CK_SLIST_FIRST(&sc->sc_ports)) == NULL)
-			bzero(&lladdr, ETHER_ADDR_LEN);
+			bzero(&lladdr, LAGG_ADDR_LEN);
 		else
-			bcopy(lp0->lp_lladdr, lladdr, ETHER_ADDR_LEN);
+			bcopy(lp0->lp_lladdr, lladdr, LAGG_ADDR_LEN);
 		sc->sc_primary = lp0;
 		if (sc->sc_destroying == 0) {
-			bcopy(lladdr, IF_LLADDR(sc->sc_ifp), ETHER_ADDR_LEN);
+			bcopy(lladdr, IF_LLADDR(sc->sc_ifp), sc->sc_ifp->if_addrlen);
 			lagg_proto_lladdr(sc);
 			EVENTHANDLER_INVOKE(iflladdr_event, sc->sc_ifp);
 		}
@@ -905,7 +984,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport
 		 * as well, to switch from old lladdr to its 'real' one)
 		 */
 		CK_SLIST_FOREACH(lp_ptr, &sc->sc_ports, lp_entries)
-			if_setlladdr(lp_ptr->lp_ifp, lladdr, ETHER_ADDR_LEN);
+			if_setlladdr(lp_ptr->lp_ifp, lladdr, lp_ptr->lp_ifp->if_addrlen);
 	}
 
 	if (lp->lp_ifflags)
@@ -914,7 +993,7 @@ lagg_port_destroy(struct lagg_port *lp, int rundelport
 	if (lp->lp_detaching == 0) {
 		lagg_setflags(lp, 0);
 		lagg_setcaps(lp, lp->lp_ifcapenable);
-		if_setlladdr(ifp, lp->lp_lladdr, ETHER_ADDR_LEN);
+		if_setlladdr(ifp, lp->lp_lladdr, ifp->if_addrlen);
 	}
 
 	/*
@@ -938,9 +1017,15 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t
 	int error = 0;
 
 	/* Should be checked by the caller */
-	if (ifp->if_type != IFT_IEEE8023ADLAG ||
-	    (lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+	switch (ifp->if_type) {
+	case IFT_IEEE8023ADLAG:
+	case IFT_INFINIBANDLAG:
+		if ((lp = ifp->if_lagg) == NULL || (sc = lp->lp_softc) == NULL)
+			goto fallback;
+		break;
+	default:
 		goto fallback;
+	}
 
 	switch (cmd) {
 	case SIOCGLAGGPORT:
@@ -1130,6 +1215,44 @@ lagg_port2req(struct lagg_port *lp, struct lagg_reqpor
 }
 
 static void
+lagg_watchdog_infiniband(void *arg)
+{
+	struct lagg_softc *sc;
+	struct lagg_port *lp;
+	struct ifnet *ifp;
+	struct ifnet *lp_ifp;
+
+	sc = arg;
+
+	/*
+	 * Because infiniband nodes have a fixed MAC address, which is
+	 * generated by the so-called GID, we need to regularly update
+	 * the link level address of the parent lagg<N> device when
+	 * the active port changes. Possibly we could piggy-back on
+	 * link up/down events aswell, but using a timer also provides
+	 * a guarantee against too frequent events. This operation
+	 * does not have to be atomic.
+	 */
+	LAGG_RLOCK();
+	lp = lagg_link_active(sc, sc->sc_primary);
+	if (lp != NULL) {
+		ifp = sc->sc_ifp;
+		lp_ifp = lp->lp_ifp;
+
+		if (ifp != NULL && lp_ifp != NULL &&
+		    memcmp(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen) != 0) {
+			memcpy(IF_LLADDR(ifp), IF_LLADDR(lp_ifp), ifp->if_addrlen);
+			CURVNET_SET(ifp->if_vnet);
+			EVENTHANDLER_INVOKE(iflladdr_event, ifp);
+			CURVNET_RESTORE();
+		}
+	}
+	LAGG_RUNLOCK();
+
+	callout_reset(&sc->sc_watchdog, hz, &lagg_watchdog_infiniband, arg);
+}
+
+static void
 lagg_init(void *xsc)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)xsc;
@@ -1151,12 +1274,18 @@ lagg_init(void *xsc)
 	 */
 	CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
 		if (memcmp(IF_LLADDR(ifp), IF_LLADDR(lp->lp_ifp),
-		    ETHER_ADDR_LEN) != 0)
-			if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ETHER_ADDR_LEN);
+		    ifp->if_addrlen) != 0)
+			if_setlladdr(lp->lp_ifp, IF_LLADDR(ifp), ifp->if_addrlen);
 	}
 
 	lagg_proto_init(sc);
 
+	if (ifp->if_type == IFT_INFINIBAND) {
+		mtx_lock(&sc->sc_mtx);
+		lagg_watchdog_infiniband(sc);
+		mtx_unlock(&sc->sc_mtx);
+	}
+
 	LAGG_XUNLOCK(sc);
 }
 
@@ -1173,6 +1302,12 @@ lagg_stop(struct lagg_softc *sc)
 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
 
 	lagg_proto_stop(sc);
+
+	mtx_lock(&sc->sc_mtx);
+	callout_stop(&sc->sc_watchdog);
+	mtx_unlock(&sc->sc_mtx);
+
+	callout_drain(&sc->sc_watchdog);
 }
 
 static int
@@ -1228,7 +1363,12 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data
 			error = EPROTONOSUPPORT;
 			break;
 		}
-
+		/* Infiniband only supports the failover protocol. */
+		if (ra->ra_proto != LAGG_PROTO_FAILOVER &&
+		    ifp->if_type == IFT_INFINIBAND) {
+			error = EPROTONOSUPPORT;
+			break;
+		}
 		LAGG_XLOCK(sc);
 		lagg_proto_detach(sc);
 		LAGG_UNLOCK_ASSERT();
@@ -1546,7 +1686,10 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data
 		break;
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
-		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
+		if (ifp->if_type == IFT_INFINIBAND)
+			error = EINVAL;
+		else
+			error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 		break;
 
 	case SIOCSIFCAP:
@@ -1855,7 +1998,7 @@ lagg_setflags(struct lagg_port *lp, int status)
 }
 
 static int
-lagg_transmit(struct ifnet *ifp, struct mbuf *m)
+lagg_transmit_ethernet(struct ifnet *ifp, struct mbuf *m)
 {
 	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
 	int error;
@@ -1880,6 +2023,32 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
 	return (error);
 }
 
+static int
+lagg_transmit_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+	struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
+	int error;
+
+#if defined(KERN_TLS) || defined(RATELIMIT)
+	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG)
+		MPASS(m->m_pkthdr.snd_tag->ifp == ifp);
+#endif
+	LAGG_RLOCK();
+	/* We need a Tx algorithm and at least one port */
+	if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
+		LAGG_RUNLOCK();
+		m_freem(m);
+		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+		return (ENXIO);
+	}
+
+	INFINIBAND_BPF_MTAP(ifp, m);
+
+	error = lagg_proto_start(sc, m);
+	LAGG_RUNLOCK();
+	return (error);
+}
+
 /*
  * The ifp->if_qflush entry point for lagg(4) is no-op.
  */
@@ -1889,7 +2058,7 @@ lagg_qflush(struct ifnet *ifp __unused)
 }
 
 static struct mbuf *
-lagg_input(struct ifnet *ifp, struct mbuf *m)
+lagg_input_ethernet(struct ifnet *ifp, struct mbuf *m)
 {
 	struct lagg_port *lp = ifp->if_lagg;
 	struct lagg_softc *sc = lp->lp_softc;
@@ -1905,6 +2074,34 @@ lagg_input(struct ifnet *ifp, struct mbuf *m)
 	}
 
 	ETHER_BPF_MTAP(scifp, m);
+
+	m = lagg_proto_input(sc, lp, m);
+	if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {
+		m_freem(m);
+		m = NULL;
+	}
+
+	LAGG_RUNLOCK();
+	return (m);
+}
+
+static struct mbuf *
+lagg_input_infiniband(struct ifnet *ifp, struct mbuf *m)
+{
+	struct lagg_port *lp = ifp->if_lagg;
+	struct lagg_softc *sc = lp->lp_softc;
+	struct ifnet *scifp = sc->sc_ifp;
+
+	LAGG_RLOCK();
+	if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
+	    lp->lp_detaching != 0 ||
+	    sc->sc_proto == LAGG_PROTO_NONE) {
+		LAGG_RUNLOCK();
+		m_freem(m);
+		return (NULL);
+	}
+
+	INFINIBAND_BPF_MTAP(scifp, m);
 
 	m = lagg_proto_input(sc, lp, m);
 	if (m != NULL && (scifp->if_flags & IFF_MONITOR) != 0) {

Modified: head/sys/net/if_lagg.h
==============================================================================
--- head/sys/net/if_lagg.h	Thu Oct 22 09:28:11 2020	(r366932)
+++ head/sys/net/if_lagg.h	Thu Oct 22 09:47:12 2020	(r366933)
@@ -72,7 +72,34 @@ struct lagg_protos {
 	{ "default",		LAGG_PROTO_DEFAULT }			\
 }
 
+/* Supported lagg TYPEs */
+typedef enum {
+	LAGG_TYPE_ETHERNET = 0, /* ethernet (default) */
+	LAGG_TYPE_INFINIBAND,	/* infiniband */
+	LAGG_TYPE_MAX,
+} lagg_type;
+
+struct lagg_types {
+	const char		*lt_name;
+	lagg_type		lt_value;
+};
+
+#define	LAGG_TYPE_DEFAULT	LAGG_TYPE_ETHERNET
+#define LAGG_TYPES	{						\
+	{ "ethernet",		LAGG_TYPE_ETHERNET },			\
+	{ "infiniband",		LAGG_TYPE_INFINIBAND },			\
+}
+
 /*
+ * lagg create clone params
+ */
+struct iflaggparam {
+	uint8_t lagg_type;	/* see LAGG_TYPE_XXX */
+	uint8_t reserved_8[3];
+	uint32_t reserved_32[3];
+};
+
+/*
  * lagg ioctls.
  */
 
@@ -206,7 +233,7 @@ struct lagg_counters {
 
 struct lagg_softc {
 	struct ifnet			*sc_ifp;	/* virtual interface */
-	struct rmlock			sc_mtx;
+	struct mtx			sc_mtx;		/* watchdog mutex */
 	struct sx			sc_sx;
 	int				sc_proto;	/* lagg protocol */
 	u_int				sc_count;	/* number of ports */
@@ -230,12 +257,15 @@ struct lagg_softc {
 	u_int				sc_opts;
 	int				flowid_shift;	/* shift the flowid */
 	struct lagg_counters		detached_counters; /* detached ports sum */
+	struct callout			sc_watchdog;	/* watchdog timer */
 };
 
 struct lagg_port {
 	struct ifnet			*lp_ifp;	/* physical interface */
 	struct lagg_softc		*lp_softc;	/* parent lagg */
-	uint8_t				lp_lladdr[ETHER_ADDR_LEN];
+#define	LAGG_ADDR_LEN \
+	MAX(INFINIBAND_ADDR_LEN, ETHER_ADDR_LEN)
+	uint8_t				lp_lladdr[LAGG_ADDR_LEN];
 
 	u_char				lp_iftype;	/* interface type */
 	uint32_t			lp_prio;	/* port priority */
@@ -257,7 +287,8 @@ struct lagg_port {
 	struct epoch_context	lp_epoch_ctx;
 };
 
-extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *);
+extern struct mbuf *(*lagg_input_infiniband_p)(struct ifnet *, struct mbuf *);
 extern void	(*lagg_linkstate_p)(struct ifnet *, int );
 
 int		lagg_enqueue(struct ifnet *, struct mbuf *);

Modified: head/sys/net/if_types.h
==============================================================================
--- head/sys/net/if_types.h	Thu Oct 22 09:28:11 2020	(r366932)
+++ head/sys/net/if_types.h	Thu Oct 22 09:47:12 2020	(r366933)
@@ -242,6 +242,7 @@ typedef enum {
 	IFT_OPTICALCHANNEL = 0xc3,	/* Optical Channel */
 	IFT_OPTICALTRANSPORT = 0xc4,	/* Optical Transport */
 	IFT_INFINIBAND	= 0xc7,		/* Infiniband */
+	IFT_INFINIBANDLAG = 0xc8,	/* Infiniband Link Aggregate */
 	IFT_BRIDGE	= 0xd1,		/* Transparent bridge interface */
 	IFT_STF		= 0xd7,		/* 6to4 interface */
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202010220947.09M9lCYA051492>