From owner-svn-src-head@freebsd.org Tue Feb 28 19:27:43 2017 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 00790CF124A; Tue, 28 Feb 2017 19:27:42 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id AE8B4C28; Tue, 28 Feb 2017 19:27:42 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id v1SJRf8u099758; Tue, 28 Feb 2017 19:27:41 GMT (envelope-from np@FreeBSD.org) Received: (from np@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id v1SJRfok099754; Tue, 28 Feb 2017 19:27:41 GMT (envelope-from np@FreeBSD.org) Message-Id: <201702281927.v1SJRfok099754@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: np set sender to np@FreeBSD.org using -f From: Navdeep Parhar Date: Tue, 28 Feb 2017 19:27:41 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r314400 - in head/sys: dev/cxgbe/iw_cxgbe ofed/drivers/infiniband/core X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 28 Feb 2017 19:27:43 -0000 Author: np Date: Tue Feb 28 19:27:41 2017 New Revision: 314400 URL: https://svnweb.freebsd.org/changeset/base/314400 Log: cxgbe/iw_cxgbe: fix various double-close panics with iWARP sockets. Sockets representing the TCP endpoints for iWARP connections are allocated by the ibcore module. Before this revision they were closed either by the ibcore module or the iw_cxgbe hardware driver depending on the state transitions during connection teardown. This is error prone and there were cases where both iw_cxgbe and ibcore closed the socket leading to double-free panics. The fix is to let ibcore close the sockets it creates and never do it in the driver. - Use sodisconnect instead of soclose (preceded by solinger = 0) in the driver to tear down an RDMA connection abruptly. This does what's intended without releasing the socket's fd reference. - Close the socket in ibcore when the iWARP iw_cm_id is destroyed. This works for all kinds of sockets: clients that initiate connections, listeners, and sockets accepted off of listeners. Reviewed by: Steve Wise @ Open Grid Computing, hselasky@ MFC after: 3 days Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D9796 Modified: head/sys/dev/cxgbe/iw_cxgbe/cm.c head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h head/sys/ofed/drivers/infiniband/core/cma.c head/sys/ofed/drivers/infiniband/core/iwcm.c Modified: head/sys/dev/cxgbe/iw_cxgbe/cm.c ============================================================================== --- head/sys/dev/cxgbe/iw_cxgbe/cm.c Tue Feb 28 19:01:59 2017 (r314399) +++ head/sys/dev/cxgbe/iw_cxgbe/cm.c Tue Feb 28 19:27:41 2017 (r314400) @@ -92,9 +92,7 @@ static void *alloc_ep(int size, gfp_t fl void __free_ep(struct c4iw_ep_common *epc); static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, __be16 peer_port, u8 tos, struct nhop4_extended *pnh4); -static int close_socket(struct c4iw_ep_common *epc, int close); -static int shutdown_socket(struct c4iw_ep_common *epc); -static void abort_socket(struct c4iw_ep *ep); +static void close_socket(struct socket *so); static int send_mpa_req(struct c4iw_ep *ep); static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen); @@ -111,7 +109,8 @@ static void process_peer_close(struct c4 static void process_conn_error(struct c4iw_ep *ep); static void process_close_complete(struct c4iw_ep *ep); static void ep_timeout(unsigned long arg); -static void init_sock(struct c4iw_ep_common *epc); +static void init_iwarp_socket(struct socket *so, void *arg); +static void uninit_iwarp_socket(struct socket *so); static void process_data(struct c4iw_ep *ep); static void process_connected(struct c4iw_ep *ep); static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag); @@ -319,87 +318,12 @@ find_route(__be32 local_ip, __be32 peer_ return err; } -static int -close_socket(struct c4iw_ep_common *epc, int close) -{ - struct socket *so = epc->so; - int rc; - - CTR5(KTR_IW_CXGBE, "%s:csoB so %p, ep %p, state %s, tid %d", __func__, - so, epc, states[epc->state], - ((struct c4iw_ep *)epc)->hwtid); - mutex_lock(&epc->so_mutex); - if ((so == NULL) || (so->so_count == 0)) { - mutex_unlock(&epc->so_mutex); - CTR5(KTR_IW_CXGBE, "%s:cso1 so %p, ep %p, state %s, tid %d", - __func__, so, epc, states[epc->state], - ((struct c4iw_ep *)epc)->hwtid); - return -EINVAL; - } - - SOCK_LOCK(so); - soupcall_clear(so, SO_RCV); - SOCK_UNLOCK(so); - - if (close) - rc = soclose(so); - else - rc = soshutdown(so, SHUT_WR | SHUT_RD); - epc->so = NULL; - - mutex_unlock(&epc->so_mutex); - return (rc); -} - -static int -shutdown_socket(struct c4iw_ep_common *epc) -{ - - struct socket *so = epc->so; - int rc; - - CTR5(KTR_IW_CXGBE, "%s:ssoB so %p, ep %p, state %s, tid %d", __func__, - epc->so, epc, states[epc->state], - ((struct c4iw_ep *)epc)->hwtid); - mutex_lock(&epc->so_mutex); - if ((so == NULL) || (so->so_count == 0)) { - mutex_unlock(&epc->so_mutex); - CTR5(KTR_IW_CXGBE, "%s:sso1 so %p, ep %p, state %s, tid %d", - __func__, epc->so, epc, states[epc->state], - ((struct c4iw_ep *)epc)->hwtid); - return -EINVAL; - } - rc = soshutdown(so, SHUT_WR); - mutex_unlock(&epc->so_mutex); - return rc; -} - static void -abort_socket(struct c4iw_ep *ep) +close_socket(struct socket *so) { - struct sockopt sopt; - int rc; - struct linger l; - - CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep, - ep->com.so, states[ep->com.state], ep->hwtid); - mutex_lock(&ep->com.so_mutex); - l.l_onoff = 1; - l.l_linger = 0; - /* linger_time of 0 forces RST to be sent */ - sopt.sopt_dir = SOPT_SET; - sopt.sopt_level = SOL_SOCKET; - sopt.sopt_name = SO_LINGER; - sopt.sopt_val = (caddr_t)&l; - sopt.sopt_valsize = sizeof l; - sopt.sopt_td = NULL; - rc = sosetopt(ep->com.so, &sopt); - if (rc) { - log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n", - __func__, rc); - } - mutex_unlock(&ep->com.so_mutex); + uninit_iwarp_socket(so); + sodisconnect(so); } static void @@ -429,7 +353,7 @@ process_peer_close(struct c4iw_ep *ep) disconnect = 0; STOP_EP_TIMER(ep); - close_socket(&ep->com, 0); + close_socket(ep->com.so); deref_cm_id(&ep->com); release = 1; break; @@ -486,7 +410,7 @@ process_peer_close(struct c4iw_ep *ep) c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_socket(&ep->com, 0); + close_socket(ep->com.so); close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; @@ -595,14 +519,7 @@ process_conn_error(struct c4iw_ep *ep) } if (state != ABORTING) { - if (ep->parent_ep) { - CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep); - close_socket(&ep->com, 1); - } else { - CTR2(KTR_IW_CXGBE, "%s:pce2 %p", __func__, ep); - close_socket(&ep->com, 0); - } - + close_socket(ep->com.so); __state_set(&ep->com, DEAD); c4iw_put_ep(&ep->com); } @@ -648,16 +565,7 @@ process_close_complete(struct c4iw_ep *e &attrs, 1); } - if (ep->parent_ep) { - - CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep); - close_socket(&ep->com, 1); - } - else { - - CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep); - close_socket(&ep->com, 0); - } + close_socket(ep->com.so); close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; @@ -688,23 +596,15 @@ process_close_complete(struct c4iw_ep *e } static void -init_sock(struct c4iw_ep_common *epc) +init_iwarp_socket(struct socket *so, void *arg) { int rc; struct sockopt sopt; - struct socket *so = epc->so; int on = 1; - mutex_lock(&epc->so_mutex); - if ((so == NULL) || (so->so_count == 0)) { - mutex_unlock(&epc->so_mutex); - CTR5(KTR_IW_CXGBE, "%s:iso1 so %p, ep %p, state %s, tid %d", - __func__, so, epc, states[epc->state], - ((struct c4iw_ep *)epc)->hwtid); - return; - } + /* Note that SOCK_LOCK(so) is same as SOCKBUF_LOCK(&so->so_rcv) */ SOCK_LOCK(so); - soupcall_set(so, SO_RCV, c4iw_so_upcall, epc); + soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); so->so_state |= SS_NBIO; SOCK_UNLOCK(so); sopt.sopt_dir = SOPT_SET; @@ -718,7 +618,15 @@ init_sock(struct c4iw_ep_common *epc) log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n", __func__, so, rc); } - mutex_unlock(&epc->so_mutex); +} + +static void +uninit_iwarp_socket(struct socket *so) +{ + + SOCKBUF_LOCK(&so->so_rcv); + soupcall_clear(so, SO_RCV); + SOCKBUF_UNLOCK(&so->so_rcv); } static void @@ -759,18 +667,18 @@ process_data(struct c4iw_ep *ep) static void process_connected(struct c4iw_ep *ep) { + struct socket *so = ep->com.so; - if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { + if ((so->so_state & SS_ISCONNECTED) && !so->so_error) { if (send_mpa_req(ep)) goto err; - } - else { - connect_reply_upcall(ep, -ep->com.so->so_error); + } else { + connect_reply_upcall(ep, -so->so_error); goto err; } return; err: - close_socket(&ep->com, 0); + close_socket(so); state_set(&ep->com, DEAD); c4iw_put_ep(&ep->com); return; @@ -785,23 +693,9 @@ process_newconn(struct iw_cm_id *parent_ struct c4iw_ep *parent_ep = parent_cm_id->provider_data; int ret = 0; - if (!child_so) { - CTR4(KTR_IW_CXGBE, - "%s: parent so %p, parent ep %p, child so %p, invalid so", - __func__, parent_ep->com.so, parent_ep, child_so); - log(LOG_ERR, "%s: invalid child socket\n", __func__); - return; - } - child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); - if (!child_ep) { - CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM", - __func__, parent_ep->com.so, parent_ep); - log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__); - return; - } - SOCKBUF_LOCK(&child_so->so_rcv); - soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep); - SOCKBUF_UNLOCK(&child_so->so_rcv); + MPASS(child_so != NULL); + + child_ep = alloc_ep(sizeof(*child_ep), M_WAITOK); CTR5(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, child so %p, child ep %p", @@ -821,6 +715,7 @@ process_newconn(struct iw_cm_id *parent_ free(local, M_SONAME); free(remote, M_SONAME); + init_iwarp_socket(child_so, &child_ep->com); c4iw_get_ep(&parent_ep->com); init_timer(&child_ep->timer); state_set(&child_ep->com, MPA_REQ_WAIT); @@ -1038,7 +933,6 @@ alloc_ep(int size, gfp_t gfp) kref_init(&epc->kref); mutex_init(&epc->mutex); - mutex_init(&epc->so_mutex); c4iw_init_wr_wait(&epc->wr_wait); return (epc); @@ -1359,30 +1253,47 @@ static void close_complete_upcall(struct CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep); } -static int send_abort(struct c4iw_ep *ep) +static int +send_abort(struct c4iw_ep *ep) { - int err; + struct socket *so = ep->com.so; + struct sockopt sopt; + int rc; + struct linger l; - CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep); - abort_socket(ep); + CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep, so, + states[ep->com.state], ep->hwtid); - /* - * Since socket options were set as l_onoff=1 and l_linger=0 in in - * abort_socket, invoking soclose here sends a RST (reset) to the peer. - */ - err = close_socket(&ep->com, 1); + l.l_onoff = 1; + l.l_linger = 0; + + /* linger_time of 0 forces RST to be sent */ + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_LINGER; + sopt.sopt_val = (caddr_t)&l; + sopt.sopt_valsize = sizeof l; + sopt.sopt_td = NULL; + rc = sosetopt(so, &sopt); + if (rc != 0) { + log(LOG_ERR, "%s: sosetopt(%p, linger = 0) failed with %d.\n", + __func__, so, rc); + } + + uninit_iwarp_socket(so); + sodisconnect(so); set_bit(ABORT_CONN, &ep->com.history); - CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep); /* - * TBD: iw_cgbe driver should receive ABORT reply for every ABORT + * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT * request it has sent. But the current TOE driver is not propagating * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work- * around de-refer 'ep' (which was refered before sending ABORT request) * here instead of doing it in abort_rpl() handler of iw_cxgbe driver. */ c4iw_put_ep(&ep->com); - return err; + + return (0); } static void peer_close_upcall(struct c4iw_ep *ep) @@ -2213,7 +2124,6 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_ep *ep = NULL; struct nhop4_extended nh4; - struct toedev *tdev; CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id); @@ -2266,8 +2176,6 @@ int c4iw_connect(struct iw_cm_id *cm_id, ep->com.thread = curthread; ep->com.so = cm_id->so; - init_sock(&ep->com); - /* find a route */ err = find_route( cm_id->local_addr.sin_addr.s_addr, @@ -2283,22 +2191,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, goto fail2; } - if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE)) { - - CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep); - printf("%s - interface not TOE capable.\n", __func__); - close_socket(&ep->com, 0); + if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE) || + TOEDEV(nh4.nh_ifp) == NULL) { err = -ENOPROTOOPT; goto fail3; } - tdev = TOEDEV(nh4.nh_ifp); - - if (tdev == NULL) { - - CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep); - printf("%s - No toedev for interface.\n", __func__); - goto fail3; - } fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); state_set(&ep->com, CONNECTING); @@ -2309,19 +2206,18 @@ int c4iw_connect(struct iw_cm_id *cm_id, ep->com.thread); if (!err) { - CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep); + init_iwarp_socket(cm_id->so, &ep->com); goto out; } else { - close_socket(&ep->com, 0); goto fail2; } fail3: - CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep); fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); fail2: deref_cm_id(&ep->com); c4iw_put_ep(&ep->com); + ep = NULL; /* CTR shouldn't display already-freed ep. */ out: CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep); return err; @@ -2465,7 +2361,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *e if (!ep->parent_ep) __state_set(&ep->com, MORIBUND); - ret = shutdown_socket(&ep->com); + ret = sodisconnect(ep->com.so); } if (ret) { Modified: head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h ============================================================================== --- head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h Tue Feb 28 19:01:59 2017 (r314399) +++ head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h Tue Feb 28 19:27:41 2017 (r314400) @@ -754,7 +754,6 @@ struct c4iw_ep_common { int rpl_done; struct thread *thread; struct socket *so; - struct mutex so_mutex; }; struct c4iw_listen_ep { Modified: head/sys/ofed/drivers/infiniband/core/cma.c ============================================================================== --- head/sys/ofed/drivers/infiniband/core/cma.c Tue Feb 28 19:01:59 2017 (r314399) +++ head/sys/ofed/drivers/infiniband/core/cma.c Tue Feb 28 19:27:41 2017 (r314400) @@ -1072,8 +1072,6 @@ static void cma_release_port(struct rdma kfree(bind_list); } mutex_unlock(&lock); - if (id_priv->sock) - sock_release(id_priv->sock); } static void cma_leave_mc_groups(struct rdma_id_private *id_priv) Modified: head/sys/ofed/drivers/infiniband/core/iwcm.c ============================================================================== --- head/sys/ofed/drivers/infiniband/core/iwcm.c Tue Feb 28 19:01:59 2017 (r314399) +++ head/sys/ofed/drivers/infiniband/core/iwcm.c Tue Feb 28 19:27:41 2017 (r314400) @@ -528,24 +528,15 @@ iw_init_sock(struct iw_cm_id *cm_id) } static int -iw_close_socket(struct iw_cm_id *cm_id, int close) +iw_uninit_socket(struct iw_cm_id *cm_id) { struct socket *so = cm_id->so; - int rc; - SOCK_LOCK(so); soupcall_clear(so, SO_RCV); SOCK_UNLOCK(so); - if (close) - rc = soclose(so); - else - rc = soshutdown(so, SHUT_WR | SHUT_RD); - - cm_id->so = NULL; - - return rc; + return (0); } static int @@ -554,18 +545,17 @@ iw_create_listen(struct iw_cm_id *cm_id, int rc; iw_init_sock(cm_id); - rc = solisten(cm_id->so, backlog, curthread); + rc = -solisten(cm_id->so, backlog, curthread); if (rc != 0) - iw_close_socket(cm_id, 0); - return rc; + iw_uninit_socket(cm_id); + return (rc); } static int iw_destroy_listen(struct iw_cm_id *cm_id) { - int rc; - rc = iw_close_socket(cm_id, 0); - return rc; + + return (iw_uninit_socket(cm_id)); } @@ -663,6 +653,9 @@ void iw_destroy_cm_id(struct iw_cm_id *c wait_for_completion(&cm_id_priv->destroy_comp); + if (cm_id->so) + sock_release(cm_id->so); + free_cm_id(cm_id_priv); } EXPORT_SYMBOL(iw_destroy_cm_id);