Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 21 Mar 2018 18:57:31 +0000 (UTC)
From:      Navdeep Parhar <np@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r331318 - projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe
Message-ID:  <201803211857.w2LIvVHM019839@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: np
Date: Wed Mar 21 18:57:31 2018
New Revision: 331318
URL: https://svnweb.freebsd.org/changeset/base/331318

Log:
  MFC r326169 (cxgbe portion).
  
  Sponsored by:	Chelsio Communications

Modified:
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cq.c
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/device.c
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/mem.c
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/provider.c
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/qp.c
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/t4.h
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/user.h
Directory Properties:
  projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/   (props changed)

Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c
==============================================================================
--- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c	Wed Mar 21 18:39:29 2018	(r331317)
+++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c	Wed Mar 21 18:57:31 2018	(r331318)
@@ -46,8 +46,11 @@ __FBSDID("$FreeBSD$");
 
 #include <netinet/in_systm.h>
 #include <netinet/in_pcb.h>
+#include <netinet6/in6_pcb.h>
 #include <netinet/ip.h>
 #include <netinet/in_fib.h>
+#include <netinet6/in6_fib.h>
+#include <netinet6/scope6_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp.h>
@@ -78,6 +81,8 @@ static struct work_struct c4iw_task;
 static struct workqueue_struct *c4iw_taskq;
 static LIST_HEAD(err_cqe_list);
 static spinlock_t err_cqe_lock;
+static LIST_HEAD(listen_port_list);
+static DEFINE_MUTEX(listen_port_mutex);
 
 static void process_req(struct work_struct *ctx);
 static void start_ep_timer(struct c4iw_ep *ep);
@@ -85,12 +90,7 @@ static int stop_ep_timer(struct c4iw_ep *ep);
 static int set_tcpinfo(struct c4iw_ep *ep);
 static void process_timeout(struct c4iw_ep *ep);
 static void process_err_cqes(void);
-static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
-static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
-static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
 static void *alloc_ep(int size, gfp_t flags);
-static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
-		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4);
 static void close_socket(struct socket *so);
 static int send_mpa_req(struct c4iw_ep *ep);
 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
@@ -120,6 +120,15 @@ static int process_terminate(struct c4iw_ep *ep);
 static int terminate(struct sge_iq *iq, const struct rss_header *rss,
     struct mbuf *m);
 static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events);
+static struct listen_port_info *
+add_ep_to_listenlist(struct c4iw_listen_ep *lep);
+static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep);
+static struct c4iw_listen_ep *
+find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so);
+static int get_ifnet_from_raddr(struct sockaddr_storage *raddr,
+		struct ifnet **ifp);
+static void process_newconn(struct c4iw_listen_ep *master_lep,
+		struct socket *new_so);
 #define START_EP_TIMER(ep) \
     do { \
 	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
@@ -134,6 +143,34 @@ static int add_ep_to_req_list(struct c4iw_ep *ep, int 
 	    stop_ep_timer(ep); \
     })
 
+#define GET_LOCAL_ADDR(pladdr, so) \
+	do { \
+		struct sockaddr_storage *__a = NULL; \
+		struct  inpcb *__inp = sotoinpcb(so); \
+		KASSERT(__inp != NULL, \
+		   ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
+		if (__inp->inp_vflag & INP_IPV4) \
+			in_getsockaddr(so, (struct sockaddr **)&__a); \
+		else \
+			in6_getsockaddr(so, (struct sockaddr **)&__a); \
+		*(pladdr) = *__a; \
+		free(__a, M_SONAME); \
+	} while (0)
+
+#define GET_REMOTE_ADDR(praddr, so) \
+	do { \
+		struct sockaddr_storage *__a = NULL; \
+		struct  inpcb *__inp = sotoinpcb(so); \
+		KASSERT(__inp != NULL, \
+		   ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \
+		if (__inp->inp_vflag & INP_IPV4) \
+			in_getpeeraddr(so, (struct sockaddr **)&__a); \
+		else \
+			in6_getpeeraddr(so, (struct sockaddr **)&__a); \
+		*(praddr) = *__a; \
+		free(__a, M_SONAME); \
+	} while (0)
+
 #ifdef KTR
 static char *states[] = {
 	"idle",
@@ -152,7 +189,6 @@ static char *states[] = {
 };
 #endif
 
-
 static void deref_cm_id(struct c4iw_ep_common *epc)
 {
       epc->cm_id->rem_ref(epc->cm_id);
@@ -179,13 +215,184 @@ static void ref_qp(struct c4iw_ep *ep)
 	set_bit(QP_REFED, &ep->com.history);
 	c4iw_qp_add_ref(&ep->com.qp->ibqp);
 }
+/* allocated per TCP port while listening */
+struct listen_port_info {
+	uint16_t port_num; /* TCP port address */
+	struct list_head list; /* belongs to listen_port_list */
+	struct list_head lep_list; /* per port lep list */
+	uint32_t refcnt; /* number of lep's listening */
+};
 
+/*
+ * Following two lists are used to manage INADDR_ANY listeners:
+ * 1)listen_port_list
+ * 2)lep_list
+ *
+ * Below is the INADDR_ANY listener lists overview on a system with a two port
+ * adapter:
+ *   |------------------|
+ *   |listen_port_list  |
+ *   |------------------|
+ *            |
+ *            |              |-----------|       |-----------|  
+ *            |              | port_num:X|       | port_num:X|  
+ *            |--------------|-list------|-------|-list------|-------....
+ *                           | lep_list----|     | lep_list----|
+ *                           | refcnt    | |     | refcnt    | |
+ *                           |           | |     |           | |
+ *                           |           | |     |           | |
+ *                           |-----------| |     |-----------| |
+ *                                         |                   |
+ *                                         |                   |
+ *                                         |                   |
+ *                                         |                   |         lep1                  lep2         
+ *                                         |                   |    |----------------|    |----------------|
+ *                                         |                   |----| listen_ep_list |----| listen_ep_list |
+ *                                         |                        |----------------|    |----------------|
+ *                                         |
+ *                                         |
+ *                                         |        lep1                  lep2         
+ *                                         |   |----------------|    |----------------|
+ *                                         |---| listen_ep_list |----| listen_ep_list |
+ *                                             |----------------|    |----------------|
+ *
+ * Because of two port adapter, the number of lep's are two(lep1 & lep2) for
+ * each TCP port number.
+ *
+ * Here 'lep1' is always marked as Master lep, because solisten() is always
+ * called through first lep. 
+ *
+ */
+static struct listen_port_info *
+add_ep_to_listenlist(struct c4iw_listen_ep *lep)
+{
+	uint16_t port;
+	struct listen_port_info *port_info = NULL;
+	struct sockaddr_storage *laddr = &lep->com.local_addr;
+
+	port = (laddr->ss_family == AF_INET) ?
+		((struct sockaddr_in *)laddr)->sin_port :
+		((struct sockaddr_in6 *)laddr)->sin6_port;
+
+	mutex_lock(&listen_port_mutex);
+
+	list_for_each_entry(port_info, &listen_port_list, list)
+		if (port_info->port_num == port)
+			goto found_port;
+
+	port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK);
+	port_info->port_num = port;
+	port_info->refcnt    = 0;
+
+	list_add_tail(&port_info->list, &listen_port_list);
+	INIT_LIST_HEAD(&port_info->lep_list);
+
+found_port:
+	port_info->refcnt++;
+	list_add_tail(&lep->listen_ep_list, &port_info->lep_list);
+	mutex_unlock(&listen_port_mutex);
+	return port_info;
+}
+
+static int
+rem_ep_from_listenlist(struct c4iw_listen_ep *lep)
+{
+	uint16_t port;
+	struct listen_port_info *port_info = NULL;
+	struct sockaddr_storage *laddr = &lep->com.local_addr;
+	int refcnt = 0;
+
+	port = (laddr->ss_family == AF_INET) ?
+		((struct sockaddr_in *)laddr)->sin_port :
+		((struct sockaddr_in6 *)laddr)->sin6_port;
+
+	mutex_lock(&listen_port_mutex);
+
+	/* get the port_info structure based on the lep's port address */
+	list_for_each_entry(port_info, &listen_port_list, list) {
+		if (port_info->port_num == port) {
+			port_info->refcnt--;
+			refcnt = port_info->refcnt;
+			/* remove the current lep from the listen list */
+			list_del(&lep->listen_ep_list);
+			if (port_info->refcnt == 0) {
+				/* Remove this entry from the list as there
+				 * are no more listeners for this port_num.
+				 */
+				list_del(&port_info->list);
+				kfree(port_info);
+			}
+			break;
+		}
+	}
+	mutex_unlock(&listen_port_mutex);
+	return refcnt;
+}
+
+/*
+ * Find the lep that belongs to the ifnet on which the SYN frame was received.
+ */
+struct c4iw_listen_ep *
+find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so)
+{
+	struct adapter *adap = NULL;
+	struct c4iw_listen_ep *lep = NULL;
+	struct sockaddr_storage remote = { 0 };
+	struct ifnet *new_conn_ifp = NULL;
+	struct listen_port_info *port_info = NULL;
+	int err = 0, i = 0,
+	    found_portinfo = 0, found_lep = 0;
+	uint16_t port;
+
+	/* STEP 1: get 'ifnet' based on socket's remote address */
+	GET_REMOTE_ADDR(&remote, so);
+
+	err = get_ifnet_from_raddr(&remote, &new_conn_ifp);
+	if (err) {
+		CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, "
+				"master_lep %p err %d",
+				__func__, so, master_lep, err);
+		return (NULL);
+	}
+
+	/* STEP 2: Find 'port_info' with listener local port address. */
+	port = (master_lep->com.local_addr.ss_family == AF_INET) ?
+		((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port :
+		((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port;
+
+
+	mutex_lock(&listen_port_mutex);
+	list_for_each_entry(port_info, &listen_port_list, list)
+		if (port_info->port_num == port) {
+			found_portinfo =1;
+			break;
+		}
+	if (!found_portinfo)
+		goto out;
+
+	/* STEP 3: Traverse through list of lep's that are bound to the current
+	 * TCP port address and find the lep that belongs to the ifnet on which
+	 * the SYN frame was received.
+	 */
+	list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) {
+		adap = lep->com.dev->rdev.adap;
+		for_each_port(adap, i) {
+			if (new_conn_ifp == adap->port[i]->vi[0].ifp) {
+				found_lep =1;
+				goto out;
+			}
+		}
+	}
+out:
+	mutex_unlock(&listen_port_mutex);
+	return found_lep ? lep : (NULL);
+}
+
 static void process_timeout(struct c4iw_ep *ep)
 {
-	struct c4iw_qp_attributes attrs;
+	struct c4iw_qp_attributes attrs = {0};
 	int abort = 1;
 
-	mutex_lock(&ep->com.mutex);
 	CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
 			ep, ep->hwtid, ep->com.state);
 	set_bit(TIMEDOUT, &ep->com.history);
@@ -221,7 +428,6 @@ static void process_timeout(struct c4iw_ep *ep)
 				, __func__, ep, ep->hwtid, ep->com.state);
 		abort = 0;
 	}
-	mutex_unlock(&ep->com.mutex);
 	if (abort)
 		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
 	c4iw_put_ep(&ep->com);
@@ -273,14 +479,16 @@ process_req(struct work_struct *ctx)
 		ep_events = epc->ep_events;
 		epc->ep_events = 0;
 		spin_unlock_irqrestore(&req_lock, flag);
-		CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, events 0x%x", __func__,
-		    epc->so, epc, ep_events);
+		mutex_lock(&epc->mutex);
+		CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x",
+		    __func__, epc->so, epc, states[epc->state], ep_events);
 		if (ep_events & C4IW_EVENT_TERM)
 			process_terminate((struct c4iw_ep *)epc);
 		if (ep_events & C4IW_EVENT_TIMEOUT)
 			process_timeout((struct c4iw_ep *)epc);
 		if (ep_events & C4IW_EVENT_SOCKET)
 			process_socket_event((struct c4iw_ep *)epc);
+		mutex_unlock(&epc->mutex);
 		c4iw_put_ep(epc);
 		process_err_cqes();
 		spin_lock_irqsave(&req_lock, flag);
@@ -321,55 +529,67 @@ done:
 	return (rc);
 
 }
-
 static int
-find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
-		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4)
+get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp)
 {
-	struct in_addr addr;
-	int err;
+	int err = 0;
 
-	CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
-	    peer_ip, ntohs(local_port), ntohs(peer_port));
+	if (raddr->ss_family == AF_INET) {
+		struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr;
+		struct nhop4_extended nh4 = {0};
 
-	addr.s_addr = peer_ip;
-	err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4);
+		err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr,
+				NHR_REF, 0, &nh4);
+		*ifp = nh4.nh_ifp;
+		if (err)
+			fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
+	} else {
+		struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr;
+		struct nhop6_extended nh6 = {0};
+		struct in6_addr addr6;
+		uint32_t scopeid;
 
-	CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err);
+		memset(&addr6, 0, sizeof(addr6));
+		in6_splitscope((struct in6_addr *)&raddr6->sin6_addr,
+					&addr6, &scopeid);
+		err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid,
+				NHR_REF, 0, &nh6);
+		*ifp = nh6.nh_ifp;
+		if (err)
+			fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6);
+	}
+
+	CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err);
 	return err;
 }
 
 static void
 close_socket(struct socket *so)
 {
-
 	uninit_iwarp_socket(so);
-	sodisconnect(so);
+	soclose(so);
 }
 
 static void
 process_peer_close(struct c4iw_ep *ep)
 {
-	struct c4iw_qp_attributes attrs;
+	struct c4iw_qp_attributes attrs = {0};
 	int disconnect = 1;
 	int release = 0;
 
 	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
 	    ep->com.so, states[ep->com.state]);
 
-	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
 
 		case MPA_REQ_WAIT:
-			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
+			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD",
 			    __func__, ep);
-			__state_set(&ep->com, CLOSING);
-			break;
-
+			/* Fallthrough */
 		case MPA_REQ_SENT:
-			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
+			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD",
 			    __func__, ep);
-			__state_set(&ep->com, DEAD);
+			ep->com.state = DEAD;
 			connect_reply_upcall(ep, -ECONNABORTED);
 
 			disconnect = 0;
@@ -388,21 +608,20 @@ process_peer_close(struct c4iw_ep *ep)
 			 */
 			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
 			    __func__, ep);
-			__state_set(&ep->com, CLOSING);
-			c4iw_get_ep(&ep->com);
+			ep->com.state = CLOSING;
 			break;
 
 		case MPA_REP_SENT:
 			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
 			    __func__, ep);
-			__state_set(&ep->com, CLOSING);
+			ep->com.state = CLOSING;
 			break;
 
 		case FPDU_MODE:
 			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
 			    __func__, ep);
 			START_EP_TIMER(ep);
-			__state_set(&ep->com, CLOSING);
+			ep->com.state = CLOSING;
 			attrs.next_state = C4IW_QP_STATE_CLOSING;
 			c4iw_modify_qp(ep->com.dev, ep->com.qp,
 					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
@@ -418,7 +637,7 @@ process_peer_close(struct c4iw_ep *ep)
 		case CLOSING:
 			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
 			    __func__, ep);
-			__state_set(&ep->com, MORIBUND);
+			ep->com.state = MORIBUND;
 			disconnect = 0;
 			break;
 
@@ -433,7 +652,7 @@ process_peer_close(struct c4iw_ep *ep)
 			}
 			close_socket(ep->com.so);
 			close_complete_upcall(ep, 0);
-			__state_set(&ep->com, DEAD);
+			ep->com.state = DEAD;
 			release = 1;
 			disconnect = 0;
 			break;
@@ -450,7 +669,6 @@ process_peer_close(struct c4iw_ep *ep)
 			break;
 	}
 
-	mutex_unlock(&ep->com.mutex);
 
 	if (disconnect) {
 
@@ -469,11 +687,10 @@ process_peer_close(struct c4iw_ep *ep)
 static void
 process_conn_error(struct c4iw_ep *ep)
 {
-	struct c4iw_qp_attributes attrs;
+	struct c4iw_qp_attributes attrs = {0};
 	int ret;
 	int state;
 
-	mutex_lock(&ep->com.mutex);
 	state = ep->com.state;
 	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
 	    __func__, ep, ep->com.so, ep->com.so->so_error,
@@ -483,6 +700,7 @@ process_conn_error(struct c4iw_ep *ep)
 
 		case MPA_REQ_WAIT:
 			STOP_EP_TIMER(ep);
+			c4iw_put_ep(&ep->parent_ep->com);
 			break;
 
 		case MPA_REQ_SENT:
@@ -496,13 +714,6 @@ process_conn_error(struct c4iw_ep *ep)
 			break;
 
 		case MPA_REQ_RCVD:
-
-			/*
-			 * We're gonna mark this puppy DEAD, but keep
-			 * the reference on it until the ULP accepts or
-			 * rejects the CR.
-			 */
-			c4iw_get_ep(&ep->com);
 			break;
 
 		case MORIBUND:
@@ -531,7 +742,6 @@ process_conn_error(struct c4iw_ep *ep)
 		case DEAD:
 			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
 			    __func__, ep->com.so->so_error);
-			mutex_unlock(&ep->com.mutex);
 			return;
 
 		default:
@@ -541,10 +751,9 @@ process_conn_error(struct c4iw_ep *ep)
 
 	if (state != ABORTING) {
 		close_socket(ep->com.so);
-		__state_set(&ep->com, DEAD);
+		ep->com.state = DEAD;
 		c4iw_put_ep(&ep->com);
 	}
-	mutex_unlock(&ep->com.mutex);
 	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
 	return;
 }
@@ -552,14 +761,13 @@ process_conn_error(struct c4iw_ep *ep)
 static void
 process_close_complete(struct c4iw_ep *ep)
 {
-	struct c4iw_qp_attributes attrs;
+	struct c4iw_qp_attributes attrs = {0};
 	int release = 0;
 
 	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
 	    ep->com.so, states[ep->com.state]);
 
 	/* The cm_id may be null if we failed to connect */
-	mutex_lock(&ep->com.mutex);
 	set_bit(CLOSE_CON_RPL, &ep->com.history);
 
 	switch (ep->com.state) {
@@ -567,7 +775,7 @@ process_close_complete(struct c4iw_ep *ep)
 		case CLOSING:
 			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
 			    __func__, ep);
-			__state_set(&ep->com, MORIBUND);
+			ep->com.state = MORIBUND;
 			break;
 
 		case MORIBUND:
@@ -588,7 +796,7 @@ process_close_complete(struct c4iw_ep *ep)
 
 			close_socket(ep->com.so);
 			close_complete_upcall(ep, 0);
-			__state_set(&ep->com, DEAD);
+			ep->com.state = DEAD;
 			release = 1;
 			break;
 
@@ -605,12 +813,11 @@ process_close_complete(struct c4iw_ep *ep)
 			panic("%s:pcc6 %p unknown ep state", __func__, ep);
 			break;
 	}
-	mutex_unlock(&ep->com.mutex);
 
 	if (release) {
 
 		CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
-		c4iw_put_ep(&ep->com);
+		release_ep_resources(ep);
 	}
 	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
 	return;
@@ -639,49 +846,56 @@ setiwsockopt(struct socket *so)
 static void
 init_iwarp_socket(struct socket *so, void *arg)
 {
-
-	SOCKBUF_LOCK(&so->so_rcv);
-	soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
-	so->so_state |= SS_NBIO;
-	SOCKBUF_UNLOCK(&so->so_rcv);
+	if (SOLISTENING(so)) {
+		SOLISTEN_LOCK(so);
+		solisten_upcall_set(so, c4iw_so_upcall, arg);
+		so->so_state |= SS_NBIO;
+		SOLISTEN_UNLOCK(so);
+	} else {
+		SOCKBUF_LOCK(&so->so_rcv);
+		soupcall_set(so, SO_RCV, c4iw_so_upcall, arg);
+		so->so_state |= SS_NBIO;
+		SOCKBUF_UNLOCK(&so->so_rcv);
+	}
 }
 
 static void
 uninit_iwarp_socket(struct socket *so)
 {
-
-	SOCKBUF_LOCK(&so->so_rcv);
-	soupcall_clear(so, SO_RCV);
-	SOCKBUF_UNLOCK(&so->so_rcv);
+	if (SOLISTENING(so)) {
+		SOLISTEN_LOCK(so);
+		solisten_upcall_set(so, NULL, NULL);
+		SOLISTEN_UNLOCK(so);
+	} else {
+		SOCKBUF_LOCK(&so->so_rcv);
+		soupcall_clear(so, SO_RCV);
+		SOCKBUF_UNLOCK(&so->so_rcv);
+	}
 }
 
 static void
 process_data(struct c4iw_ep *ep)
 {
-	struct sockaddr_in *local, *remote;
 	int disconnect = 0;
 
 	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
 	    ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
 
-	switch (state_read(&ep->com)) {
+	switch (ep->com.state) {
 	case MPA_REQ_SENT:
 		disconnect = process_mpa_reply(ep);
 		break;
 	case MPA_REQ_WAIT:
-		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
-		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
-		ep->com.local_addr = *local;
-		ep->com.remote_addr = *remote;
-		free(local, M_SONAME);
-		free(remote, M_SONAME);
 		disconnect = process_mpa_request(ep);
+		if (disconnect)
+			/* Refered in process_newconn() */
+			c4iw_put_ep(&ep->parent_ep->com);
 		break;
 	default:
 		if (sbused(&ep->com.so->so_rcv))
 			log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
 			    "state %d, so %p, so_state 0x%x, sbused %u\n",
-			    __func__, ep, state_read(&ep->com), ep->com.so,
+			    __func__, ep, ep->com.state, ep->com.so,
 			    ep->com.so->so_state, sbused(&ep->com.so->so_rcv));
 		break;
 	}
@@ -705,58 +919,122 @@ process_connected(struct c4iw_ep *ep)
 	return;
 err:
 	close_socket(so);
-	state_set(&ep->com, DEAD);
+	ep->com.state = DEAD;
 	c4iw_put_ep(&ep->com);
 	return;
 }
 
-void
-process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
+static inline int c4iw_zero_addr(struct sockaddr *addr)
 {
-	struct c4iw_ep *child_ep;
-	struct sockaddr_in *local;
-	struct sockaddr_in *remote;
-	struct c4iw_ep *parent_ep = parent_cm_id->provider_data;
+	struct in6_addr *ip6;
+
+	if (addr->sa_family == AF_INET)
+		return IN_ZERONET(
+			ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr));
+	else {
+		ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
+		return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
+				ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
+	}
+}
+
+static inline int c4iw_loopback_addr(struct sockaddr *addr)
+{
+	if (addr->sa_family == AF_INET)
+		return IN_LOOPBACK(
+			ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr));
+	else
+		return IN6_IS_ADDR_LOOPBACK(
+				&((struct sockaddr_in6 *) addr)->sin6_addr);
+}
+
+static inline int c4iw_any_addr(struct sockaddr *addr)
+{
+	return c4iw_zero_addr(addr) || c4iw_loopback_addr(addr);
+}
+
+static void
+process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so)
+{
+	struct c4iw_listen_ep *real_lep = NULL;
+	struct c4iw_ep *new_ep = NULL;
+	struct sockaddr_in *remote = NULL;
 	int ret = 0;
 
-	MPASS(child_so != NULL);
+	MPASS(new_so != NULL);
 
-	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
+	if (c4iw_any_addr((struct sockaddr *)&master_lep->com.local_addr)) {
+		/* Here we need to find the 'real_lep' that belongs to the
+		 * incomming socket's network interface, such that the newly
+		 * created 'ep' can be attached to the real 'lep'.
+		 */
+		real_lep = find_real_listen_ep(master_lep, new_so);
+		if (real_lep == NULL) {
+			CTR2(KTR_IW_CXGBE, "%s: Could not find the real listen "
+					"ep for sock: %p", __func__, new_so);
+			log(LOG_ERR,"%s: Could not find the real listen ep for "
+					"sock: %p\n", __func__, new_so);
+			/* FIXME: properly free the 'new_so' in failure case.
+			 * Use of soabort() and  soclose() are not legal
+			 * here(before soaccept()).
+			 */
+			return;
+		}
+	} else /* for Non-Wildcard address, master_lep is always the real_lep */
+		real_lep = master_lep;
 
-	CTR5(KTR_IW_CXGBE,
-	    "%s: parent so %p, parent ep %p, child so %p, child ep %p",
-	     __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
+	new_ep = alloc_ep(sizeof(*new_ep), GFP_KERNEL);
 
-	in_getsockaddr(child_so, (struct sockaddr **)&local);
-	in_getpeeraddr(child_so, (struct sockaddr **)&remote);
+	CTR6(KTR_IW_CXGBE, "%s: master_lep %p, real_lep: %p, new ep %p, "
+	    "listening so %p, new so %p", __func__, master_lep, real_lep,
+	    new_ep, master_lep->com.so, new_so);
 
-	child_ep->com.local_addr = *local;
-	child_ep->com.remote_addr = *remote;
-	child_ep->com.dev = parent_ep->com.dev;
-	child_ep->com.so = child_so;
-	child_ep->com.cm_id = NULL;
-	child_ep->com.thread = parent_ep->com.thread;
-	child_ep->parent_ep = parent_ep;
+	new_ep->com.dev = real_lep->com.dev;
+	new_ep->com.so = new_so;
+	new_ep->com.cm_id = NULL;
+	new_ep->com.thread = real_lep->com.thread;
+	new_ep->parent_ep = real_lep;
 
-	free(local, M_SONAME);
+	GET_LOCAL_ADDR(&new_ep->com.local_addr, new_so);
+	GET_REMOTE_ADDR(&new_ep->com.remote_addr, new_so);
+	c4iw_get_ep(&real_lep->com);
+	init_timer(&new_ep->timer);
+	new_ep->com.state = MPA_REQ_WAIT;
+	START_EP_TIMER(new_ep);
+
+	setiwsockopt(new_so);
+	ret = soaccept(new_so, (struct sockaddr **)&remote);
+	if (ret != 0) {
+		CTR4(KTR_IW_CXGBE,
+				"%s:listen sock:%p, new sock:%p, ret:%d\n",
+				__func__, master_lep->com.so, new_so, ret);
+		if (remote != NULL)
+			free(remote, M_SONAME);
+		uninit_iwarp_socket(new_so);
+		soclose(new_so);
+		c4iw_put_ep(&new_ep->com);
+		c4iw_put_ep(&real_lep->com);
+		return;
+	}
 	free(remote, M_SONAME);
 
-	setiwsockopt(child_so);
-	init_iwarp_socket(child_so, &child_ep->com);
-	c4iw_get_ep(&parent_ep->com);
-	init_timer(&child_ep->timer);
-	state_set(&child_ep->com, MPA_REQ_WAIT);
-	START_EP_TIMER(child_ep);
+	/* MPA request might have been queued up on the socket already, so we
+	 * initialize the socket/upcall_handler under lock to prevent processing
+	 * MPA request on another thread(via process_req()) simultaniously.
+	 */
+	c4iw_get_ep(&new_ep->com); /* Dereferenced at the end below, this is to
+				      avoid freeing of ep before ep unlock. */
+	mutex_lock(&new_ep->com.mutex);
+	init_iwarp_socket(new_so, &new_ep->com);
 
-	/* maybe the request has already been queued up on the socket... */
-	ret = process_mpa_request(child_ep);
-	if (ret == 2)
+	ret = process_mpa_request(new_ep);
+	if (ret) {
 		/* ABORT */
-		c4iw_ep_disconnect(child_ep, 1, GFP_KERNEL);
-	else if (ret == 1)
-		/* CLOSE */
-		c4iw_ep_disconnect(child_ep, 0, GFP_KERNEL);
-
+		c4iw_ep_disconnect(new_ep, 1, GFP_KERNEL);
+		c4iw_put_ep(&real_lep->com);
+	}
+	mutex_unlock(&new_ep->com.mutex);
+	c4iw_put_ep(&new_ep->com);
 	return;
 }
 
@@ -790,6 +1068,12 @@ c4iw_so_upcall(struct socket *so, void *arg, int waitf
 	    ep->com.entry.tqe_prev);
 
 	MPASS(ep->com.so == so);
+	/*
+	 * Wake up any threads waiting in rdma_init()/rdma_fini(),
+	 * with locks held.
+	 */
+	if (so->so_error)
+		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
 	add_ep_to_req_list(ep, C4IW_EVENT_SOCKET);
 
 	return (SU_OK);
@@ -820,9 +1104,15 @@ terminate(struct sge_iq *iq, const struct rss_header *
 static void
 process_socket_event(struct c4iw_ep *ep)
 {
-	int state = state_read(&ep->com);
+	int state = ep->com.state;
 	struct socket *so = ep->com.so;
 
+	if (ep->com.state == DEAD) {
+		CTR3(KTR_IW_CXGBE, "%s: Pending socket event discarded "
+			"ep %p ep_state %s", __func__, ep, states[state]); 
+		return;
+	}
+
 	CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
 	    "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
 	    so->so_error, so->so_rcv.sb_state, ep, states[state]);
@@ -833,10 +1123,29 @@ process_socket_event(struct c4iw_ep *ep)
 	}
 
 	if (state == LISTEN) {
-		/* socket listening events are handled at IWCM */
-		CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__,
-			    ep->com.state, ep);
-		BUG();
+		struct c4iw_listen_ep *lep = (struct c4iw_listen_ep *)ep;
+		struct socket *listen_so = so, *new_so = NULL;
+		int error = 0;
+
+		SOLISTEN_LOCK(listen_so);
+		do {
+			error = solisten_dequeue(listen_so, &new_so,
+						SOCK_NONBLOCK);
+			if (error) {
+				CTR4(KTR_IW_CXGBE, "%s: lep %p listen_so %p "
+					"error %d", __func__, lep, listen_so,
+					error);
+				return;
+			}
+			process_newconn(lep, new_so);
+
+			/* solisten_dequeue() unlocks while return, so aquire
+			 * lock again for sol_qlen and also for next iteration.
+			 */
+			SOLISTEN_LOCK(listen_so);
+		} while (listen_so->sol_qlen);
+		SOLISTEN_UNLOCK(listen_so);
+
 		return;
 	}
 
@@ -955,34 +1264,6 @@ stop_ep_timer(struct c4iw_ep *ep)
 	return 1;
 }
 
-static enum
-c4iw_ep_state state_read(struct c4iw_ep_common *epc)
-{
-	enum c4iw_ep_state state;
-
-	mutex_lock(&epc->mutex);
-	state = epc->state;
-	mutex_unlock(&epc->mutex);
-
-	return (state);
-}
-
-static void
-__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
-{
-
-	epc->state = new;
-}
-
-static void
-state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
-{
-
-	mutex_lock(&epc->mutex);
-	__state_set(epc, new);
-	mutex_unlock(&epc->mutex);
-}
-
 static void *
 alloc_ep(int size, gfp_t gfp)
 {
@@ -1059,8 +1340,8 @@ send_mpa_req(struct c4iw_ep *ep)
 	}
 
 	if (mpa_rev_to_use == 2) {
-		mpa->private_data_size +=
-			htons(sizeof(struct mpa_v2_conn_params));
+		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+					    sizeof(struct mpa_v2_conn_params));
 		mpa_v2_params.ird = htons((u16)ep->ird);
 		mpa_v2_params.ord = htons((u16)ep->ord);
 
@@ -1112,7 +1393,7 @@ send_mpa_req(struct c4iw_ep *ep)
 	}
 
 	START_EP_TIMER(ep);
-	state_set(&ep->com, MPA_REQ_SENT);
+	ep->com.state = MPA_REQ_SENT;
 	ep->mpa_attr.initiator = 1;
 	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
 	return 0;
@@ -1155,8 +1436,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const v
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
 
 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
-		mpa->private_data_size +=
-			htons(sizeof(struct mpa_v2_conn_params));
+		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
+					    sizeof(struct mpa_v2_conn_params));
 		mpa_v2_params.ird = htons(((u16)ep->ird) |
 				(peer2peer ? MPA_V2_PEER2PEER_MODEL :
 				 0));
@@ -1171,7 +1452,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const v
 
 		if (ep->plen)
 			memcpy(mpa->private_data +
-					sizeof(struct mpa_v2_conn_params), pdata, plen);
+				sizeof(struct mpa_v2_conn_params), pdata, plen);
 		CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
 		    mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
 	} else
@@ -1275,7 +1556,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const vo
 	free(mpa, M_CXGBE);
 
 
-	state_set(&ep->com, MPA_REP_SENT);
+	ep->com.state = MPA_REP_SENT;
 	ep->snd_seq += mpalen;
 	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
 			ep->com.thread);
@@ -1332,17 +1613,17 @@ send_abort(struct c4iw_ep *ep)
 	}
 
 	uninit_iwarp_socket(so);
-	sodisconnect(so);
+	soclose(so);
 	set_bit(ABORT_CONN, &ep->com.history);
 
 	/*
 	 * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT
 	 * request it has sent. But the current TOE driver is not propagating
 	 * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work-
-	 * around de-refer 'ep' (which was refered before sending ABORT request)
-	 * here instead of doing it in abort_rpl() handler of iw_cxgbe driver.
+	 * around de-refererece 'ep' here instead of doing it in abort_rpl()
+	 * handler(not yet implemented) of iw_cxgbe driver.
 	 */
-	c4iw_put_ep(&ep->com);
+	release_ep_resources(ep);
 
 	return (0);
 }
@@ -1401,6 +1682,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, i
 
 			CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
 			/* this means MPA_v2 is used */
+			event.ord = ep->ird;
+			event.ird = ep->ord;
 			event.private_data_len = ep->plen -
 				sizeof(struct mpa_v2_conn_params);
 			event.private_data = ep->mpa_pkt +
@@ -1410,6 +1693,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, i
 
 			CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
 			/* this means MPA_v1 is used */
+			event.ord = c4iw_max_read_depth;
+			event.ird = c4iw_max_read_depth;
 			event.private_data_len = ep->plen;
 			event.private_data = ep->mpa_pkt +
 				sizeof(struct mpa_message);
@@ -1451,7 +1736,6 @@ static int connect_request_upcall(struct c4iw_ep *ep)
 	event.local_addr = ep->com.local_addr;
 	event.remote_addr = ep->com.remote_addr;
 	event.provider_data = ep;
-	event.so = ep->com.so;
 
 	if (!ep->tried_with_mpa_v1) {
 		/* this means MPA_v2 is used */
@@ -1473,11 +1757,18 @@ static int connect_request_upcall(struct c4iw_ep *ep)
 	c4iw_get_ep(&ep->com);
 	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
 	    &event);
-	if(ret)
+	if(ret) {
+		CTR3(KTR_IW_CXGBE, "%s: ep %p, Failure while notifying event to"
+			" IWCM, err:%d", __func__, ep, ret);
 		c4iw_put_ep(&ep->com);
+	} else
+		/* Dereference parent_ep only in success case.
+		 * In case of failure, parent_ep is dereferenced by the caller
+		 * of process_mpa_request().
+		 */
+		c4iw_put_ep(&ep->parent_ep->com);
 
 	set_bit(CONNREQ_UPCALL, &ep->com.history);
-	c4iw_put_ep(&ep->parent_ep->com);
 	return ret;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803211857.w2LIvVHM019839>