Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 5 Mar 2018 14:24:30 +0000 (UTC)
From:      Hans Petter Selasky <hselasky@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r330507 - in head/sys/ofed: drivers/infiniband/core include/rdma
Message-ID:  <201803051424.w25EOUPC099171@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: hselasky
Date: Mon Mar  5 14:24:30 2018
New Revision: 330507
URL: https://svnweb.freebsd.org/changeset/base/330507

Log:
  Get correct network device when accepting incoming RDMA connections in ibcore.
  
  This patch ensures the GID index is always used as a basis of resolving
  incoming RDMA connections, as compared to the GID value itself.
  
  Background:
  On a per infiniband port basis, the GID identifier is not a unique identifier!
  This assumption falls apart when VLAN ID, IPv6 scope ID and RoCE type,
  as supported by RoCE v2, is taken into account. This additional
  information is stored in the so-called GID attributes and is needed to
  correctly identify the destination network interface for an incoming
  connection.
  
  Different VLANs are allowed to define the same IPv4 addresses and especially
  for the default IPv6 link-local addresses or when using so-called containers
  or jails, this is true.
  
  The VNET information for the destination network interface is needed in
  order to perform the L2 address lookup in the right Virtual Network Stack
  context.
  
  Consequently old functions previously used by RoCE v1, like
  rdma_addr_find_smac_by_sgid() are impossible to support, because
  there can be multiple identical GIDs associated with the same
  infiniband port, and the answer to such a request becomes undefined.
  This function has been removed.
  
  MFC after:	1 week
  Sponsored by:	Mellanox Technologies

Modified:
  head/sys/ofed/drivers/infiniband/core/ib_addr.c
  head/sys/ofed/drivers/infiniband/core/ib_cma.c
  head/sys/ofed/drivers/infiniband/core/ib_verbs.c
  head/sys/ofed/include/rdma/ib_addr.h

Modified: head/sys/ofed/drivers/infiniband/core/ib_addr.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/ib_addr.c	Mon Mar  5 14:22:36 2018	(r330506)
+++ head/sys/ofed/drivers/infiniband/core/ib_addr.c	Mon Mar  5 14:24:30 2018	(r330507)
@@ -151,8 +151,7 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, str
 EXPORT_SYMBOL(rdma_copy_addr);
 
 int rdma_translate_ip(const struct sockaddr *addr,
-		      struct rdma_dev_addr *dev_addr,
-		      u16 *vlan_id)
+		      struct rdma_dev_addr *dev_addr)
 {
 	struct net_device *dev = NULL;
 	int ret = -EADDRNOTAVAIL;
@@ -185,8 +184,6 @@ int rdma_translate_ip(const struct sockaddr *addr,
 
 	if (dev != NULL) {
 		ret = rdma_copy_addr(dev_addr, dev, NULL);
-		if (vlan_id)
-			*vlan_id = rdma_vlan_dev_vlan_id(dev);
 		dev_put(dev);
 	}
 	return ret;
@@ -305,6 +302,8 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 		if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
 			ifp = rte->rt_ifp;
 			dev_hold(ifp);
+		} else if (addr->bound_dev_if != 0) {
+			ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
 		} else {
 			ifp = ip_dev_find(addr->net, src_in->sin_addr.s_addr);
 		}
@@ -460,6 +459,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 		if (rte->rt_ifp->if_flags & IFF_LOOPBACK) {
 			ifp = rte->rt_ifp;
 			dev_hold(ifp);
+		} else if (addr->bound_dev_if != 0) {
+			ifp = dev_get_by_index(addr->net, addr->bound_dev_if);
 		} else {
 			ifp = ip6_dev_find(addr->net, src_in->sin6_addr);
 		}
@@ -551,7 +552,7 @@ static int addr_resolve_neigh(struct ifnet *dev,
 	if (dev->if_flags & IFF_LOOPBACK) {
 		int ret;
 
-		ret = rdma_translate_ip(dst_in, addr, NULL);
+		ret = rdma_translate_ip(dst_in, addr);
 		if (!ret)
 			memcpy(addr->dst_dev_addr, addr->src_dev_addr,
 			       MAX_ADDR_LEN);
@@ -757,13 +758,12 @@ static void resolve_cb(int status, struct sockaddr *sr
 
 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 				 const union ib_gid *dgid,
-				 u8 *dmac, u16 *vlan_id, int *if_index,
+				 u8 *dmac, struct net_device *dev,
 				 int *hoplimit)
 {
 	int ret = 0;
 	struct rdma_dev_addr dev_addr;
 	struct resolve_cb_context ctx;
-	struct net_device *dev;
 
 	union {
 		struct sockaddr     _sockaddr;
@@ -771,15 +771,14 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *s
 		struct sockaddr_in6 _sockaddr_in6;
 	} sgid_addr, dgid_addr;
 
-
 	rdma_gid2ip(&sgid_addr._sockaddr, sgid);
 	rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 
 	memset(&dev_addr, 0, sizeof(dev_addr));
-	if (if_index)
-		dev_addr.bound_dev_if = *if_index;
-	dev_addr.net = TD_TO_VNET(curthread);
 
+	dev_addr.bound_dev_if = dev->if_index;
+	dev_addr.net = dev_net(dev);
+
 	ctx.addr = &dev_addr;
 	init_completion(&ctx.comp);
 	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
@@ -794,42 +793,11 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *s
 		return ret;
 
 	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
-	dev = dev_get_by_index(dev_addr.net, dev_addr.bound_dev_if);
-	if (!dev)
-		return -ENODEV;
-	if (if_index)
-		*if_index = dev_addr.bound_dev_if;
-	if (vlan_id)
-		*vlan_id = rdma_vlan_dev_vlan_id(dev);
 	if (hoplimit)
 		*hoplimit = dev_addr.hoplimit;
-	dev_put(dev);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
-
-int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
-{
-	int ret = 0;
-	struct rdma_dev_addr dev_addr;
-	union {
-		struct sockaddr     _sockaddr;
-		struct sockaddr_in  _sockaddr_in;
-		struct sockaddr_in6 _sockaddr_in6;
-	} gid_addr;
-
-	rdma_gid2ip(&gid_addr._sockaddr, sgid);
-
-	memset(&dev_addr, 0, sizeof(dev_addr));
-	dev_addr.net = TD_TO_VNET(curthread);
-	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
-	if (ret)
-		return ret;
-
-	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
-	return ret;
-}
-EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
 
 int addr_init(void)
 {

Modified: head/sys/ofed/drivers/infiniband/core/ib_cma.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/ib_cma.c	Mon Mar  5 14:22:36 2018	(r330506)
+++ head/sys/ofed/drivers/infiniband/core/ib_cma.c	Mon Mar  5 14:24:30 2018	(r330507)
@@ -533,7 +533,7 @@ static int cma_translate_addr(struct sockaddr *addr, s
 	int ret;
 
 	if (addr->sa_family != AF_IB) {
-		ret = rdma_translate_ip(addr, dev_addr, NULL);
+		ret = rdma_translate_ip(addr, dev_addr);
 	} else {
 		cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
 		ret = 0;
@@ -2094,7 +2094,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
 	conn_id->state = RDMA_CM_CONNECT;
 
-	ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
+	ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
 	if (ret) {
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);

Modified: head/sys/ofed/drivers/infiniband/core/ib_verbs.c
==============================================================================
--- head/sys/ofed/drivers/infiniband/core/ib_verbs.c	Mon Mar  5 14:22:36 2018	(r330506)
+++ head/sys/ofed/drivers/infiniband/core/ib_verbs.c	Mon Mar  5 14:24:30 2018	(r330507)
@@ -483,56 +483,29 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 po
 		return ret;
 
 	if (rdma_protocol_roce(device, port_num)) {
-		int if_index;
-		u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
+		struct ib_gid_attr dgid_attr;
+		const u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
 				wc->vlan_id : 0xffff;
-		struct net_device *idev;
-		struct net_device *resolved_dev;
 
 		if (!(wc->wc_flags & IB_WC_GRH))
 			return -EPROTOTYPE;
 
-		if (!device->get_netdev)
-			return -EOPNOTSUPP;
-
-		idev = device->get_netdev(device, port_num);
-		if (!idev)
-			return -ENODEV;
-
-		/*
-		 * Get network interface index early on. This is
-		 * useful for IPv6 link local addresses:
-		 */
-		if_index = idev->if_index;
-
-		ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
-						   ah_attr->dmac,
-						   wc->wc_flags & IB_WC_WITH_VLAN ?
-						   NULL : &vlan_id,
-						   &if_index, &hoplimit);
-		if (ret) {
-			dev_put(idev);
+		ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+					      &dgid, gid_type, &gid_index);
+		if (ret)
 			return ret;
-		}
 
-		resolved_dev = dev_get_by_index(&init_net, if_index);
-		if (resolved_dev->if_flags & IFF_LOOPBACK) {
-			dev_put(resolved_dev);
-			resolved_dev = idev;
-			dev_hold(resolved_dev);
-		}
-		rcu_read_lock();
-		if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
-								   resolved_dev))
-			ret = -EHOSTUNREACH;
-		rcu_read_unlock();
-		dev_put(idev);
-		dev_put(resolved_dev);
+		ret = ib_get_cached_gid(device, port_num, gid_index, &dgid, &dgid_attr);
 		if (ret)
 			return ret;
 
-		ret = get_sgid_index_from_eth(device, port_num, vlan_id,
-					      &dgid, gid_type, &gid_index);
+		if (dgid_attr.ndev == NULL)
+			return -ENODEV;
+
+		ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid, ah_attr->dmac,
+		    dgid_attr.ndev, &hoplimit);
+
+		dev_put(dgid_attr.ndev);
 		if (ret)
 			return ret;
 	}
@@ -1207,7 +1180,6 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
 		} else {
 			union ib_gid		sgid;
 			struct ib_gid_attr	sgid_attr;
-			int			ifindex;
 			int			hop_limit;
 
 			ret = ib_query_gid(qp->device,
@@ -1221,12 +1193,10 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
 				goto out;
 			}
 
-			ifindex = sgid_attr.ndev->if_index;
-
 			ret = rdma_addr_find_l2_eth_by_grh(&sgid,
 							   &qp_attr->ah_attr.grh.dgid,
 							   qp_attr->ah_attr.dmac,
-							   NULL, &ifindex, &hop_limit);
+							   sgid_attr.ndev, &hop_limit);
 
 			dev_put(sgid_attr.ndev);
 

Modified: head/sys/ofed/include/rdma/ib_addr.h
==============================================================================
--- head/sys/ofed/include/rdma/ib_addr.h	Mon Mar  5 14:22:36 2018	(r330506)
+++ head/sys/ofed/include/rdma/ib_addr.h	Mon Mar  5 14:24:30 2018	(r330507)
@@ -95,10 +95,10 @@ struct rdma_dev_addr {
  * rdma_translate_ip - Translate a local IP address to an RDMA hardware
  *   address.
  *
- * The dev_addr->net field must be initialized.
+ * The dev_addr->net and dev_addr->bound_dev_if fields must be initialized.
  */
 int rdma_translate_ip(const struct sockaddr *addr,
-		      struct rdma_dev_addr *dev_addr, u16 *vlan_id);
+		      struct rdma_dev_addr *dev_addr);
 
 /**
  * rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -134,10 +134,9 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, str
 
 int rdma_addr_size(struct sockaddr *addr);
 
-int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
 int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
 				 const union ib_gid *dgid,
-				 u8 *smac, u16 *vlan_id, int *if_index,
+				 u8 *smac, struct net_device *dev,
 				 int *hoplimit);
 
 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803051424.w25EOUPC099171>