Date: Wed, 21 Mar 2018 18:57:31 +0000 (UTC) From: Navdeep Parhar <np@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r331318 - projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe Message-ID: <201803211857.w2LIvVHM019839@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: np Date: Wed Mar 21 18:57:31 2018 New Revision: 331318 URL: https://svnweb.freebsd.org/changeset/base/331318 Log: MFC r326169 (cxgbe portion). Sponsored by: Chelsio Communications Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cq.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/device.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/mem.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/provider.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/qp.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/t4.h projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/user.h Directory Properties: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/ (props changed) Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c ============================================================================== --- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:39:29 2018 (r331317) +++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:57:31 2018 (r331318) @@ -46,8 +46,11 @@ __FBSDID("$FreeBSD$"); #include <netinet/in_systm.h> #include <netinet/in_pcb.h> +#include <netinet6/in6_pcb.h> #include <netinet/ip.h> #include <netinet/in_fib.h> +#include <netinet6/in6_fib.h> +#include <netinet6/scope6_var.h> #include <netinet/ip_var.h> #include <netinet/tcp_var.h> #include <netinet/tcp.h> @@ -78,6 +81,8 @@ static struct work_struct c4iw_task; static struct workqueue_struct *c4iw_taskq; static LIST_HEAD(err_cqe_list); static spinlock_t err_cqe_lock; +static LIST_HEAD(listen_port_list); +static DEFINE_MUTEX(listen_port_mutex); static void process_req(struct work_struct *ctx); static void start_ep_timer(struct c4iw_ep *ep); @@ -85,12 +90,7 @@ static int stop_ep_timer(struct c4iw_ep *ep); static int set_tcpinfo(struct c4iw_ep *ep); static void process_timeout(struct c4iw_ep *ep); static void process_err_cqes(void); -static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); -static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); -static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); static void *alloc_ep(int size, gfp_t flags); -static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, struct nhop4_extended *pnh4); static void close_socket(struct socket *so); static int send_mpa_req(struct c4iw_ep *ep); static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); @@ -120,6 +120,15 @@ static int process_terminate(struct c4iw_ep *ep); static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m); static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events); +static struct listen_port_info * +add_ep_to_listenlist(struct c4iw_listen_ep *lep); +static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep); +static struct c4iw_listen_ep * +find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so); +static int get_ifnet_from_raddr(struct sockaddr_storage *raddr, + struct ifnet **ifp); +static void process_newconn(struct c4iw_listen_ep *master_lep, + struct socket *new_so); #define START_EP_TIMER(ep) \ do { \ CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ @@ -134,6 +143,34 @@ static int add_ep_to_req_list(struct c4iw_ep *ep, int stop_ep_timer(ep); \ }) +#define GET_LOCAL_ADDR(pladdr, so) \ + do { \ + struct sockaddr_storage *__a = NULL; \ + struct inpcb *__inp = sotoinpcb(so); \ + KASSERT(__inp != NULL, \ + ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ + if (__inp->inp_vflag & INP_IPV4) \ + in_getsockaddr(so, (struct sockaddr **)&__a); \ + else \ + in6_getsockaddr(so, (struct sockaddr **)&__a); \ + *(pladdr) = *__a; \ + free(__a, M_SONAME); \ + } while (0) + +#define GET_REMOTE_ADDR(praddr, so) \ + do { \ + struct sockaddr_storage *__a = NULL; \ + struct inpcb *__inp = sotoinpcb(so); \ + KASSERT(__inp != NULL, \ + ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ + if (__inp->inp_vflag & INP_IPV4) \ + in_getpeeraddr(so, (struct sockaddr **)&__a); \ + else \ + in6_getpeeraddr(so, (struct sockaddr **)&__a); \ + *(praddr) = *__a; \ + free(__a, M_SONAME); \ + } while (0) + #ifdef KTR static char *states[] = { "idle", @@ -152,7 +189,6 @@ static char *states[] = { }; #endif - static void deref_cm_id(struct c4iw_ep_common *epc) { epc->cm_id->rem_ref(epc->cm_id); @@ -179,13 +215,184 @@ static void ref_qp(struct c4iw_ep *ep) set_bit(QP_REFED, &ep->com.history); c4iw_qp_add_ref(&ep->com.qp->ibqp); } +/* allocated per TCP port while listening */ +struct listen_port_info { + uint16_t port_num; /* TCP port address */ + struct list_head list; /* belongs to listen_port_list */ + struct list_head lep_list; /* per port lep list */ + uint32_t refcnt; /* number of lep's listening */ +}; +/* + * Following two lists are used to manage INADDR_ANY listeners: + * 1)listen_port_list + * 2)lep_list + * + * Below is the INADDR_ANY listener lists overview on a system with a two port + * adapter: + * |------------------| + * |listen_port_list | + * |------------------| + * | + * | |-----------| |-----------| + * | | port_num:X| | port_num:X| + * |--------------|-list------|-------|-list------|-------.... + * | lep_list----| | lep_list----| + * | refcnt | | | refcnt | | + * | | | | | | + * | | | | | | + * |-----------| | |-----------| | + * | | + * | | + * | | + * | | lep1 lep2 + * | | |----------------| |----------------| + * | |----| listen_ep_list |----| listen_ep_list | + * | |----------------| |----------------| + * | + * | + * | lep1 lep2 + * | |----------------| |----------------| + * |---| listen_ep_list |----| listen_ep_list | + * |----------------| |----------------| + * + * Because of two port adapter, the number of lep's are two(lep1 & lep2) for + * each TCP port number. + * + * Here 'lep1' is always marked as Master lep, because solisten() is always + * called through first lep. + * + */ +static struct listen_port_info * +add_ep_to_listenlist(struct c4iw_listen_ep *lep) +{ + uint16_t port; + struct listen_port_info *port_info = NULL; + struct sockaddr_storage *laddr = &lep->com.local_addr; + + port = (laddr->ss_family == AF_INET) ? + ((struct sockaddr_in *)laddr)->sin_port : + ((struct sockaddr_in6 *)laddr)->sin6_port; + + mutex_lock(&listen_port_mutex); + + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) + goto found_port; + + port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK); + port_info->port_num = port; + port_info->refcnt = 0; + + list_add_tail(&port_info->list, &listen_port_list); + INIT_LIST_HEAD(&port_info->lep_list); + +found_port: + port_info->refcnt++; + list_add_tail(&lep->listen_ep_list, &port_info->lep_list); + mutex_unlock(&listen_port_mutex); + return port_info; +} + +static int +rem_ep_from_listenlist(struct c4iw_listen_ep *lep) +{ + uint16_t port; + struct listen_port_info *port_info = NULL; + struct sockaddr_storage *laddr = &lep->com.local_addr; + int refcnt = 0; + + port = (laddr->ss_family == AF_INET) ? + ((struct sockaddr_in *)laddr)->sin_port : + ((struct sockaddr_in6 *)laddr)->sin6_port; + + mutex_lock(&listen_port_mutex); + + /* get the port_info structure based on the lep's port address */ + list_for_each_entry(port_info, &listen_port_list, list) { + if (port_info->port_num == port) { + port_info->refcnt--; + refcnt = port_info->refcnt; + /* remove the current lep from the listen list */ + list_del(&lep->listen_ep_list); + if (port_info->refcnt == 0) { + /* Remove this entry from the list as there + * are no more listeners for this port_num. + */ + list_del(&port_info->list); + kfree(port_info); + } + break; + } + } + mutex_unlock(&listen_port_mutex); + return refcnt; +} + +/* + * Find the lep that belongs to the ifnet on which the SYN frame was received. + */ +struct c4iw_listen_ep * +find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so) +{ + struct adapter *adap = NULL; + struct c4iw_listen_ep *lep = NULL; + struct sockaddr_storage remote = { 0 }; + struct ifnet *new_conn_ifp = NULL; + struct listen_port_info *port_info = NULL; + int err = 0, i = 0, + found_portinfo = 0, found_lep = 0; + uint16_t port; + + /* STEP 1: get 'ifnet' based on socket's remote address */ + GET_REMOTE_ADDR(&remote, so); + + err = get_ifnet_from_raddr(&remote, &new_conn_ifp); + if (err) { + CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, " + "master_lep %p err %d", + __func__, so, master_lep, err); + return (NULL); + } + + /* STEP 2: Find 'port_info' with listener local port address. */ + port = (master_lep->com.local_addr.ss_family == AF_INET) ? + ((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port : + ((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port; + + + mutex_lock(&listen_port_mutex); + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) { + found_portinfo =1; + break; + } + if (!found_portinfo) + goto out; + + /* STEP 3: Traverse through list of lep's that are bound to the current + * TCP port address and find the lep that belongs to the ifnet on which + * the SYN frame was received. + */ + list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) { + adap = lep->com.dev->rdev.adap; + for_each_port(adap, i) { + if (new_conn_ifp == adap->port[i]->vi[0].ifp) { + found_lep =1; + goto out; + } + } + } +out: + mutex_unlock(&listen_port_mutex); + return found_lep ? lep : (NULL); +} + static void process_timeout(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int abort = 1; - mutex_lock(&ep->com.mutex); CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__, ep, ep->hwtid, ep->com.state); set_bit(TIMEDOUT, &ep->com.history); @@ -221,7 +428,6 @@ static void process_timeout(struct c4iw_ep *ep) , __func__, ep, ep->hwtid, ep->com.state); abort = 0; } - mutex_unlock(&ep->com.mutex); if (abort) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); @@ -273,14 +479,16 @@ process_req(struct work_struct *ctx) ep_events = epc->ep_events; epc->ep_events = 0; spin_unlock_irqrestore(&req_lock, flag); - CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, events 0x%x", __func__, - epc->so, epc, ep_events); + mutex_lock(&epc->mutex); + CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x", + __func__, epc->so, epc, states[epc->state], ep_events); if (ep_events & C4IW_EVENT_TERM) process_terminate((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_TIMEOUT) process_timeout((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_SOCKET) process_socket_event((struct c4iw_ep *)epc); + mutex_unlock(&epc->mutex); c4iw_put_ep(epc); process_err_cqes(); spin_lock_irqsave(&req_lock, flag); @@ -321,55 +529,67 @@ done: return (rc); } - static int -find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) +get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp) { - struct in_addr addr; - int err; + int err = 0; - CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, - peer_ip, ntohs(local_port), ntohs(peer_port)); + if (raddr->ss_family == AF_INET) { + struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr; + struct nhop4_extended nh4 = {0}; - addr.s_addr = peer_ip; - err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4); + err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr, + NHR_REF, 0, &nh4); + *ifp = nh4.nh_ifp; + if (err) + fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); + } else { + struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr; + struct nhop6_extended nh6 = {0}; + struct in6_addr addr6; + uint32_t scopeid; - CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err); + memset(&addr6, 0, sizeof(addr6)); + in6_splitscope((struct in6_addr *)&raddr6->sin6_addr, + &addr6, &scopeid); + err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid, + NHR_REF, 0, &nh6); + *ifp = nh6.nh_ifp; + if (err) + fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6); + } + + CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err); return err; } static void close_socket(struct socket *so) { - uninit_iwarp_socket(so); - sodisconnect(so); + soclose(so); } static void process_peer_close(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int disconnect = 1; int release = 0; CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); - mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: - CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING", + CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD", __func__, ep); - __state_set(&ep->com, CLOSING); - break; - + /* Fallthrough */ case MPA_REQ_SENT: - CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING", + CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD", __func__, ep); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; connect_reply_upcall(ep, -ECONNABORTED); disconnect = 0; @@ -388,21 +608,20 @@ process_peer_close(struct c4iw_ep *ep) */ CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", __func__, ep); - __state_set(&ep->com, CLOSING); - c4iw_get_ep(&ep->com); + ep->com.state = CLOSING; break; case MPA_REP_SENT: CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", __func__, ep); - __state_set(&ep->com, CLOSING); + ep->com.state = CLOSING; break; case FPDU_MODE: CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", __func__, ep); START_EP_TIMER(ep); - __state_set(&ep->com, CLOSING); + ep->com.state = CLOSING; attrs.next_state = C4IW_QP_STATE_CLOSING; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); @@ -418,7 +637,7 @@ process_peer_close(struct c4iw_ep *ep) case CLOSING: CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", __func__, ep); - __state_set(&ep->com, MORIBUND); + ep->com.state = MORIBUND; disconnect = 0; break; @@ -433,7 +652,7 @@ process_peer_close(struct c4iw_ep *ep) } close_socket(ep->com.so); close_complete_upcall(ep, 0); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; release = 1; disconnect = 0; break; @@ -450,7 +669,6 @@ process_peer_close(struct c4iw_ep *ep) break; } - mutex_unlock(&ep->com.mutex); if (disconnect) { @@ -469,11 +687,10 @@ process_peer_close(struct c4iw_ep *ep) static void process_conn_error(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int ret; int state; - mutex_lock(&ep->com.mutex); state = ep->com.state; CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", __func__, ep, ep->com.so, ep->com.so->so_error, @@ -483,6 +700,7 @@ process_conn_error(struct c4iw_ep *ep) case MPA_REQ_WAIT: STOP_EP_TIMER(ep); + c4iw_put_ep(&ep->parent_ep->com); break; case MPA_REQ_SENT: @@ -496,13 +714,6 @@ process_conn_error(struct c4iw_ep *ep) break; case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. - */ - c4iw_get_ep(&ep->com); break; case MORIBUND: @@ -531,7 +742,6 @@ process_conn_error(struct c4iw_ep *ep) case DEAD: CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", __func__, ep->com.so->so_error); - mutex_unlock(&ep->com.mutex); return; default: @@ -541,10 +751,9 @@ process_conn_error(struct c4iw_ep *ep) if (state != ABORTING) { close_socket(ep->com.so); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; c4iw_put_ep(&ep->com); } - mutex_unlock(&ep->com.mutex); CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); return; } @@ -552,14 +761,13 @@ process_conn_error(struct c4iw_ep *ep) static void process_close_complete(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int release = 0; CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); /* The cm_id may be null if we failed to connect */ - mutex_lock(&ep->com.mutex); set_bit(CLOSE_CON_RPL, &ep->com.history); switch (ep->com.state) { @@ -567,7 +775,7 @@ process_close_complete(struct c4iw_ep *ep) case CLOSING: CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", __func__, ep); - __state_set(&ep->com, MORIBUND); + ep->com.state = MORIBUND; break; case MORIBUND: @@ -588,7 +796,7 @@ process_close_complete(struct c4iw_ep *ep) close_socket(ep->com.so); close_complete_upcall(ep, 0); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; release = 1; break; @@ -605,12 +813,11 @@ process_close_complete(struct c4iw_ep *ep) panic("%s:pcc6 %p unknown ep state", __func__, ep); break; } - mutex_unlock(&ep->com.mutex); if (release) { CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep); - c4iw_put_ep(&ep->com); + release_ep_resources(ep); } CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); return; @@ -639,49 +846,56 @@ setiwsockopt(struct socket *so) static void init_iwarp_socket(struct socket *so, void *arg) { - - SOCKBUF_LOCK(&so->so_rcv); - soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); - so->so_state |= SS_NBIO; - SOCKBUF_UNLOCK(&so->so_rcv); + if (SOLISTENING(so)) { + SOLISTEN_LOCK(so); + solisten_upcall_set(so, c4iw_so_upcall, arg); + so->so_state |= SS_NBIO; + SOLISTEN_UNLOCK(so); + } else { + SOCKBUF_LOCK(&so->so_rcv); + soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); + so->so_state |= SS_NBIO; + SOCKBUF_UNLOCK(&so->so_rcv); + } } static void uninit_iwarp_socket(struct socket *so) { - - SOCKBUF_LOCK(&so->so_rcv); - soupcall_clear(so, SO_RCV); - SOCKBUF_UNLOCK(&so->so_rcv); + if (SOLISTENING(so)) { + SOLISTEN_LOCK(so); + solisten_upcall_set(so, NULL, NULL); + SOLISTEN_UNLOCK(so); + } else { + SOCKBUF_LOCK(&so->so_rcv); + soupcall_clear(so, SO_RCV); + SOCKBUF_UNLOCK(&so->so_rcv); + } } static void process_data(struct c4iw_ep *ep) { - struct sockaddr_in *local, *remote; int disconnect = 0; CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); - switch (state_read(&ep->com)) { + switch (ep->com.state) { case MPA_REQ_SENT: disconnect = process_mpa_reply(ep); break; case MPA_REQ_WAIT: - in_getsockaddr(ep->com.so, (struct sockaddr **)&local); - in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); - ep->com.local_addr = *local; - ep->com.remote_addr = *remote; - free(local, M_SONAME); - free(remote, M_SONAME); disconnect = process_mpa_request(ep); + if (disconnect) + /* Refered in process_newconn() */ + c4iw_put_ep(&ep->parent_ep->com); break; default: if (sbused(&ep->com.so->so_rcv)) log(LOG_ERR, "%s: Unexpected streaming data. ep %p, " "state %d, so %p, so_state 0x%x, sbused %u\n", - __func__, ep, state_read(&ep->com), ep->com.so, + __func__, ep, ep->com.state, ep->com.so, ep->com.so->so_state, sbused(&ep->com.so->so_rcv)); break; } @@ -705,58 +919,122 @@ process_connected(struct c4iw_ep *ep) return; err: close_socket(so); - state_set(&ep->com, DEAD); + ep->com.state = DEAD; c4iw_put_ep(&ep->com); return; } -void -process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) +static inline int c4iw_zero_addr(struct sockaddr *addr) { - struct c4iw_ep *child_ep; - struct sockaddr_in *local; - struct sockaddr_in *remote; - struct c4iw_ep *parent_ep = parent_cm_id->provider_data; + struct in6_addr *ip6; + + if (addr->sa_family == AF_INET) + return IN_ZERONET( + ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr)); + else { + ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; + return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | + ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; + } +} + +static inline int c4iw_loopback_addr(struct sockaddr *addr) +{ + if (addr->sa_family == AF_INET) + return IN_LOOPBACK( + ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr)); + else + return IN6_IS_ADDR_LOOPBACK( + &((struct sockaddr_in6 *) addr)->sin6_addr); +} + +static inline int c4iw_any_addr(struct sockaddr *addr) +{ + return c4iw_zero_addr(addr) || c4iw_loopback_addr(addr); +} + +static void +process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so) +{ + struct c4iw_listen_ep *real_lep = NULL; + struct c4iw_ep *new_ep = NULL; + struct sockaddr_in *remote = NULL; int ret = 0; - MPASS(child_so != NULL); + MPASS(new_so != NULL); - child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); + if (c4iw_any_addr((struct sockaddr *)&master_lep->com.local_addr)) { + /* Here we need to find the 'real_lep' that belongs to the + * incomming socket's network interface, such that the newly + * created 'ep' can be attached to the real 'lep'. + */ + real_lep = find_real_listen_ep(master_lep, new_so); + if (real_lep == NULL) { + CTR2(KTR_IW_CXGBE, "%s: Could not find the real listen " + "ep for sock: %p", __func__, new_so); + log(LOG_ERR,"%s: Could not find the real listen ep for " + "sock: %p\n", __func__, new_so); + /* FIXME: properly free the 'new_so' in failure case. + * Use of soabort() and soclose() are not legal + * here(before soaccept()). + */ + return; + } + } else /* for Non-Wildcard address, master_lep is always the real_lep */ + real_lep = master_lep; - CTR5(KTR_IW_CXGBE, - "%s: parent so %p, parent ep %p, child so %p, child ep %p", - __func__, parent_ep->com.so, parent_ep, child_so, child_ep); + new_ep = alloc_ep(sizeof(*new_ep), GFP_KERNEL); - in_getsockaddr(child_so, (struct sockaddr **)&local); - in_getpeeraddr(child_so, (struct sockaddr **)&remote); + CTR6(KTR_IW_CXGBE, "%s: master_lep %p, real_lep: %p, new ep %p, " + "listening so %p, new so %p", __func__, master_lep, real_lep, + new_ep, master_lep->com.so, new_so); - child_ep->com.local_addr = *local; - child_ep->com.remote_addr = *remote; - child_ep->com.dev = parent_ep->com.dev; - child_ep->com.so = child_so; - child_ep->com.cm_id = NULL; - child_ep->com.thread = parent_ep->com.thread; - child_ep->parent_ep = parent_ep; + new_ep->com.dev = real_lep->com.dev; + new_ep->com.so = new_so; + new_ep->com.cm_id = NULL; + new_ep->com.thread = real_lep->com.thread; + new_ep->parent_ep = real_lep; - free(local, M_SONAME); + GET_LOCAL_ADDR(&new_ep->com.local_addr, new_so); + GET_REMOTE_ADDR(&new_ep->com.remote_addr, new_so); + c4iw_get_ep(&real_lep->com); + init_timer(&new_ep->timer); + new_ep->com.state = MPA_REQ_WAIT; + START_EP_TIMER(new_ep); + + setiwsockopt(new_so); + ret = soaccept(new_so, (struct sockaddr **)&remote); + if (ret != 0) { + CTR4(KTR_IW_CXGBE, + "%s:listen sock:%p, new sock:%p, ret:%d\n", + __func__, master_lep->com.so, new_so, ret); + if (remote != NULL) + free(remote, M_SONAME); + uninit_iwarp_socket(new_so); + soclose(new_so); + c4iw_put_ep(&new_ep->com); + c4iw_put_ep(&real_lep->com); + return; + } free(remote, M_SONAME); - setiwsockopt(child_so); - init_iwarp_socket(child_so, &child_ep->com); - c4iw_get_ep(&parent_ep->com); - init_timer(&child_ep->timer); - state_set(&child_ep->com, MPA_REQ_WAIT); - START_EP_TIMER(child_ep); + /* MPA request might have been queued up on the socket already, so we + * initialize the socket/upcall_handler under lock to prevent processing + * MPA request on another thread(via process_req()) simultaniously. + */ + c4iw_get_ep(&new_ep->com); /* Dereferenced at the end below, this is to + avoid freeing of ep before ep unlock. */ + mutex_lock(&new_ep->com.mutex); + init_iwarp_socket(new_so, &new_ep->com); - /* maybe the request has already been queued up on the socket... */ - ret = process_mpa_request(child_ep); - if (ret == 2) + ret = process_mpa_request(new_ep); + if (ret) { /* ABORT */ - c4iw_ep_disconnect(child_ep, 1, GFP_KERNEL); - else if (ret == 1) - /* CLOSE */ - c4iw_ep_disconnect(child_ep, 0, GFP_KERNEL); - + c4iw_ep_disconnect(new_ep, 1, GFP_KERNEL); + c4iw_put_ep(&real_lep->com); + } + mutex_unlock(&new_ep->com.mutex); + c4iw_put_ep(&new_ep->com); return; } @@ -790,6 +1068,12 @@ c4iw_so_upcall(struct socket *so, void *arg, int waitf ep->com.entry.tqe_prev); MPASS(ep->com.so == so); + /* + * Wake up any threads waiting in rdma_init()/rdma_fini(), + * with locks held. + */ + if (so->so_error) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); add_ep_to_req_list(ep, C4IW_EVENT_SOCKET); return (SU_OK); @@ -820,9 +1104,15 @@ terminate(struct sge_iq *iq, const struct rss_header * static void process_socket_event(struct c4iw_ep *ep) { - int state = state_read(&ep->com); + int state = ep->com.state; struct socket *so = ep->com.so; + if (ep->com.state == DEAD) { + CTR3(KTR_IW_CXGBE, "%s: Pending socket event discarded " + "ep %p ep_state %s", __func__, ep, states[state]); + return; + } + CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, " "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state, so->so_error, so->so_rcv.sb_state, ep, states[state]); @@ -833,10 +1123,29 @@ process_socket_event(struct c4iw_ep *ep) } if (state == LISTEN) { - /* socket listening events are handled at IWCM */ - CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__, - ep->com.state, ep); - BUG(); + struct c4iw_listen_ep *lep = (struct c4iw_listen_ep *)ep; + struct socket *listen_so = so, *new_so = NULL; + int error = 0; + + SOLISTEN_LOCK(listen_so); + do { + error = solisten_dequeue(listen_so, &new_so, + SOCK_NONBLOCK); + if (error) { + CTR4(KTR_IW_CXGBE, "%s: lep %p listen_so %p " + "error %d", __func__, lep, listen_so, + error); + return; + } + process_newconn(lep, new_so); + + /* solisten_dequeue() unlocks while return, so aquire + * lock again for sol_qlen and also for next iteration. + */ + SOLISTEN_LOCK(listen_so); + } while (listen_so->sol_qlen); + SOLISTEN_UNLOCK(listen_so); + return; } @@ -955,34 +1264,6 @@ stop_ep_timer(struct c4iw_ep *ep) return 1; } -static enum -c4iw_ep_state state_read(struct c4iw_ep_common *epc) -{ - enum c4iw_ep_state state; - - mutex_lock(&epc->mutex); - state = epc->state; - mutex_unlock(&epc->mutex); - - return (state); -} - -static void -__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) -{ - - epc->state = new; -} - -static void -state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) -{ - - mutex_lock(&epc->mutex); - __state_set(epc, new); - mutex_unlock(&epc->mutex); -} - static void * alloc_ep(int size, gfp_t gfp) { @@ -1059,8 +1340,8 @@ send_mpa_req(struct c4iw_ep *ep) } if (mpa_rev_to_use == 2) { - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); @@ -1112,7 +1393,7 @@ send_mpa_req(struct c4iw_ep *ep) } START_EP_TIMER(ep); - state_set(&ep->com, MPA_REQ_SENT); + ep->com.state = MPA_REQ_SENT; ep->mpa_attr.initiator = 1; CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err); return 0; @@ -1155,8 +1436,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const v if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons(((u16)ep->ird) | (peer2peer ? MPA_V2_PEER2PEER_MODEL : 0)); @@ -1171,7 +1452,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const v if (ep->plen) memcpy(mpa->private_data + - sizeof(struct mpa_v2_conn_params), pdata, plen); + sizeof(struct mpa_v2_conn_params), pdata, plen); CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep, mpa_v2_params.ird, mpa_v2_params.ord, ep->plen); } else @@ -1275,7 +1556,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const vo free(mpa, M_CXGBE); - state_set(&ep->com, MPA_REP_SENT); + ep->com.state = MPA_REP_SENT; ep->snd_seq += mpalen; err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); @@ -1332,17 +1613,17 @@ send_abort(struct c4iw_ep *ep) } uninit_iwarp_socket(so); - sodisconnect(so); + soclose(so); set_bit(ABORT_CONN, &ep->com.history); /* * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT * request it has sent. But the current TOE driver is not propagating * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work- - * around de-refer 'ep' (which was refered before sending ABORT request) - * here instead of doing it in abort_rpl() handler of iw_cxgbe driver. + * around de-refererece 'ep' here instead of doing it in abort_rpl() + * handler(not yet implemented) of iw_cxgbe driver. */ - c4iw_put_ep(&ep->com); + release_ep_resources(ep); return (0); } @@ -1401,6 +1682,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, i CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep); /* this means MPA_v2 is used */ + event.ord = ep->ird; + event.ird = ep->ord; event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + @@ -1410,6 +1693,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, i CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep); /* this means MPA_v1 is used */ + event.ord = c4iw_max_read_depth; + event.ird = c4iw_max_read_depth; event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); @@ -1451,7 +1736,6 @@ static int connect_request_upcall(struct c4iw_ep *ep) event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; event.provider_data = ep; - event.so = ep->com.so; if (!ep->tried_with_mpa_v1) { /* this means MPA_v2 is used */ @@ -1473,11 +1757,18 @@ static int connect_request_upcall(struct c4iw_ep *ep) c4iw_get_ep(&ep->com); ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, &event); - if(ret) + if(ret) { + CTR3(KTR_IW_CXGBE, "%s: ep %p, Failure while notifying event to" + " IWCM, err:%d", __func__, ep, ret); c4iw_put_ep(&ep->com); + } else + /* Dereference parent_ep only in success case. + * In case of failure, parent_ep is dereferenced by the caller + * of process_mpa_request(). + */ + c4iw_put_ep(&ep->parent_ep->com); set_bit(CONNREQ_UPCALL, &ep->com.history); - c4iw_put_ep(&ep->parent_ep->com); return ret; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803211857.w2LIvVHM019839>