From owner-svn-src-all@FreeBSD.ORG Thu Oct 17 18:37:25 2013 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTP id A1FD6C48; Thu, 17 Oct 2013 18:37:25 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id 807B728C9; Thu, 17 Oct 2013 18:37:25 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id r9HIbPFh062955; Thu, 17 Oct 2013 18:37:25 GMT (envelope-from np@svn.freebsd.org) Received: (from np@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id r9HIbPWp062953; Thu, 17 Oct 2013 18:37:25 GMT (envelope-from np@svn.freebsd.org) Message-Id: <201310171837.r9HIbPWp062953@svn.freebsd.org> From: Navdeep Parhar Date: Thu, 17 Oct 2013 18:37:25 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r256694 - in head/sys: dev/cxgbe/iw_cxgbe modules/cxgbe modules/cxgbe/iw_cxgbe X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 17 Oct 2013 18:37:25 -0000 Author: np Date: Thu Oct 17 18:37:25 2013 New Revision: 256694 URL: http://svnweb.freebsd.org/changeset/base/256694 Log: iw_cxgbe: iWARP driver for Chelsio T4/T5 chips. This is a straight port of the iw_cxgb4 found in OFED distributions. Obtained from: Chelsio Added: head/sys/dev/cxgbe/iw_cxgbe/ head/sys/dev/cxgbe/iw_cxgbe/cm.c (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/cq.c (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/device.c (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/ev.c (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/id_table.c (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/mem.c (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/provider.c (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/qp.c (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/resource.c (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/t4.h (contents, props changed) head/sys/dev/cxgbe/iw_cxgbe/user.h (contents, props changed) head/sys/modules/cxgbe/iw_cxgbe/ head/sys/modules/cxgbe/iw_cxgbe/Makefile (contents, props changed) Modified: head/sys/modules/cxgbe/Makefile Added: head/sys/dev/cxgbe/iw_cxgbe/cm.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/dev/cxgbe/iw_cxgbe/cm.c Thu Oct 17 18:37:25 2013 (r256694) @@ -0,0 +1,2458 @@ +/* + * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" + +#ifdef TCP_OFFLOAD +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +struct sge_iq; +struct rss_header; +#include +#include "offload.h" +#include "tom/t4_tom.h" + +#define TOEPCB(so) ((struct toepcb *)(so_sototcpcb((so))->t_toe)) + +#include "iw_cxgbe.h" +#include +#include +#include +#include +#include +#include + +static spinlock_t req_lock; +static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list; +static struct work_struct c4iw_task; +static struct workqueue_struct *c4iw_taskq; +static LIST_HEAD(timeout_list); +static spinlock_t timeout_lock; + +static void process_req(struct work_struct *ctx); +static void start_ep_timer(struct c4iw_ep *ep); +static void stop_ep_timer(struct c4iw_ep *ep); +static int set_tcpinfo(struct c4iw_ep *ep); +static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); +static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); +static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); +static void *alloc_ep(int size, gfp_t flags); +void __free_ep(struct c4iw_ep_common *epc); +static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos); +static int close_socket(struct c4iw_ep_common *epc, int close); +static int shutdown_socket(struct c4iw_ep_common *epc); +static void abort_socket(struct c4iw_ep *ep); +static void send_mpa_req(struct c4iw_ep *ep); +static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); +static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen); +static void close_complete_upcall(struct c4iw_ep *ep); +static int abort_connection(struct c4iw_ep *ep); +static void peer_close_upcall(struct c4iw_ep *ep); +static void peer_abort_upcall(struct c4iw_ep *ep); +static void connect_reply_upcall(struct c4iw_ep *ep, int status); +static void connect_request_upcall(struct c4iw_ep *ep); +static void established_upcall(struct c4iw_ep *ep); +static void process_mpa_reply(struct c4iw_ep *ep); +static void process_mpa_request(struct c4iw_ep *ep); +static void process_peer_close(struct c4iw_ep *ep); +static void process_conn_error(struct c4iw_ep *ep); +static void process_close_complete(struct c4iw_ep *ep); +static void ep_timeout(unsigned long arg); +static void init_sock(struct c4iw_ep_common *epc); +static void process_data(struct c4iw_ep *ep); +static void process_connected(struct c4iw_ep *ep); +static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep); +static void process_newconn(struct c4iw_ep *parent_ep); +static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag); +static void process_socket_event(struct c4iw_ep *ep); +static void release_ep_resources(struct c4iw_ep *ep); + +#define START_EP_TIMER(ep) \ + do { \ + CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ + __func__, __LINE__, (ep)); \ + start_ep_timer(ep); \ + } while (0) + +#define STOP_EP_TIMER(ep) \ + do { \ + CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \ + __func__, __LINE__, (ep)); \ + stop_ep_timer(ep); \ + } while (0) + +#ifdef KTR +static char *states[] = { + "idle", + "listen", + "connecting", + "mpa_wait_req", + "mpa_req_sent", + "mpa_req_rcvd", + "mpa_rep_sent", + "fpdu_mode", + "aborting", + "closing", + "moribund", + "dead", + NULL, +}; +#endif + +static void +process_req(struct work_struct *ctx) +{ + struct c4iw_ep_common *epc; + + spin_lock(&req_lock); + while (!TAILQ_EMPTY(&req_list)) { + epc = TAILQ_FIRST(&req_list); + TAILQ_REMOVE(&req_list, epc, entry); + epc->entry.tqe_prev = NULL; + spin_unlock(&req_lock); + if (epc->so) + process_socket_event((struct c4iw_ep *)epc); + c4iw_put_ep(epc); + spin_lock(&req_lock); + } + spin_unlock(&req_lock); +} + +/* + * XXX: doesn't belong here in the iWARP driver. + * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is + * set. Is this a valid assumption for active open? + */ +static int +set_tcpinfo(struct c4iw_ep *ep) +{ + struct socket *so = ep->com.so; + struct inpcb *inp = sotoinpcb(so); + struct tcpcb *tp; + struct toepcb *toep; + int rc = 0; + + INP_WLOCK(inp); + tp = intotcpcb(inp); + if ((tp->t_flags & TF_TOE) == 0) { + rc = EINVAL; + log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n", + __func__, so, ep); + goto done; + } + toep = TOEPCB(so); + + ep->hwtid = toep->tid; + ep->snd_seq = tp->snd_nxt; + ep->rcv_seq = tp->rcv_nxt; + ep->emss = max(tp->t_maxseg, 128); +done: + INP_WUNLOCK(inp); + return (rc); + +} + +static struct rtentry * +find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos) +{ + struct route iproute; + struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; + + CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, + peer_ip, ntohs(local_port), ntohs(peer_port)); + bzero(&iproute, sizeof iproute); + dst->sin_family = AF_INET; + dst->sin_len = sizeof *dst; + dst->sin_addr.s_addr = peer_ip; + + rtalloc(&iproute); + CTR2(KTR_IW_CXGBE, "%s:frtE %p", __func__, (uint64_t)iproute.ro_rt); + return iproute.ro_rt; +} + +static int +close_socket(struct c4iw_ep_common *epc, int close) +{ + struct socket *so = epc->so; + int rc; + + CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc, so, + states[epc->state]); + + SOCK_LOCK(so); + soupcall_clear(so, SO_RCV); + SOCK_UNLOCK(so); + + if (close) + rc = soclose(so); + else + rc = soshutdown(so, SHUT_WR | SHUT_RD); + epc->so = NULL; + + return (rc); +} + +static int +shutdown_socket(struct c4iw_ep_common *epc) +{ + + CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc->so, epc, + states[epc->state]); + + return (soshutdown(epc->so, SHUT_WR)); +} + +static void +abort_socket(struct c4iw_ep *ep) +{ + struct sockopt sopt; + int rc; + struct linger l; + + CTR4(KTR_IW_CXGBE, "%s ep %p so %p state %s", __func__, ep, ep->com.so, + states[ep->com.state]); + + l.l_onoff = 1; + l.l_linger = 0; + + /* linger_time of 0 forces RST to be sent */ + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_LINGER; + sopt.sopt_val = (caddr_t)&l; + sopt.sopt_valsize = sizeof l; + sopt.sopt_td = NULL; + rc = sosetopt(ep->com.so, &sopt); + if (rc) { + log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n", + __func__, rc); + } +} + +static void +process_peer_close(struct c4iw_ep *ep) +{ + struct c4iw_qp_attributes attrs; + int disconnect = 1; + int release = 0; + + CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, + ep->com.so, states[ep->com.state]); + + mutex_lock(&ep->com.mutex); + switch (ep->com.state) { + + case MPA_REQ_WAIT: + CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING", + __func__, ep); + __state_set(&ep->com, CLOSING); + break; + + case MPA_REQ_SENT: + CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING", + __func__, ep); + __state_set(&ep->com, DEAD); + connect_reply_upcall(ep, -ECONNABORTED); + + disconnect = 0; + STOP_EP_TIMER(ep); + close_socket(&ep->com, 0); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + release = 1; + break; + + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. + */ + CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", + __func__, ep); + __state_set(&ep->com, CLOSING); + c4iw_get_ep(&ep->com); + break; + + case MPA_REP_SENT: + CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", + __func__, ep); + __state_set(&ep->com, CLOSING); + break; + + case FPDU_MODE: + CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", + __func__, ep); + START_EP_TIMER(ep); + __state_set(&ep->com, CLOSING); + attrs.next_state = C4IW_QP_STATE_CLOSING; + c4iw_modify_qp(ep->com.dev, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + peer_close_upcall(ep); + break; + + case ABORTING: + CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)", + __func__, ep); + disconnect = 0; + break; + + case CLOSING: + CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", + __func__, ep); + __state_set(&ep->com, MORIBUND); + disconnect = 0; + break; + + case MORIBUND: + CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__, + ep); + STOP_EP_TIMER(ep); + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + close_socket(&ep->com, 0); + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + disconnect = 0; + break; + + case DEAD: + CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)", + __func__, ep); + disconnect = 0; + break; + + default: + panic("%s: ep %p state %d", __func__, ep, + ep->com.state); + break; + } + + mutex_unlock(&ep->com.mutex); + + if (disconnect) { + + CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep); + c4iw_ep_disconnect(ep, 0, M_NOWAIT); + } + if (release) { + + CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep); + c4iw_put_ep(&ep->com); + } + CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep); + return; +} + +static void +process_conn_error(struct c4iw_ep *ep) +{ + struct c4iw_qp_attributes attrs; + int ret; + int state; + + state = state_read(&ep->com); + CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", + __func__, ep, ep->com.so, ep->com.so->so_error, + states[ep->com.state]); + + switch (state) { + + case MPA_REQ_WAIT: + STOP_EP_TIMER(ep); + break; + + case MPA_REQ_SENT: + STOP_EP_TIMER(ep); + connect_reply_upcall(ep, -ECONNRESET); + break; + + case MPA_REP_SENT: + ep->com.rpl_err = ECONNRESET; + CTR1(KTR_IW_CXGBE, "waking up ep %p", ep); + break; + + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. + */ + c4iw_get_ep(&ep->com); + break; + + case MORIBUND: + case CLOSING: + STOP_EP_TIMER(ep); + /*FALLTHROUGH*/ + case FPDU_MODE: + + if (ep->com.cm_id && ep->com.qp) { + + attrs.next_state = C4IW_QP_STATE_ERROR; + ret = c4iw_modify_qp(ep->com.qp->rhp, + ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) + log(LOG_ERR, + "%s - qp <- error failed!\n", + __func__); + } + peer_abort_upcall(ep); + break; + + case ABORTING: + break; + + case DEAD: + CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", + __func__, ep->com.so->so_error); + return; + + default: + panic("%s: ep %p state %d", __func__, ep, state); + break; + } + + if (state != ABORTING) { + + CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep); + close_socket(&ep->com, 0); + state_set(&ep->com, DEAD); + c4iw_put_ep(&ep->com); + } + CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); + return; +} + +static void +process_close_complete(struct c4iw_ep *ep) +{ + struct c4iw_qp_attributes attrs; + int release = 0; + + CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, + ep->com.so, states[ep->com.state]); + + /* The cm_id may be null if we failed to connect */ + mutex_lock(&ep->com.mutex); + + switch (ep->com.state) { + + case CLOSING: + CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", + __func__, ep); + __state_set(&ep->com, MORIBUND); + break; + + case MORIBUND: + CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__, + ep); + STOP_EP_TIMER(ep); + + if ((ep->com.cm_id) && (ep->com.qp)) { + + CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE", + __func__, ep); + attrs.next_state = C4IW_QP_STATE_IDLE; + c4iw_modify_qp(ep->com.dev, + ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + + if (ep->parent_ep) { + + CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep); + close_socket(&ep->com, 1); + } + else { + + CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep); + close_socket(&ep->com, 0); + } + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + + case ABORTING: + CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep); + break; + + case DEAD: + default: + CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep); + panic("%s:pcc6 %p DEAD", __func__, ep); + break; + } + mutex_unlock(&ep->com.mutex); + + if (release) { + + CTR2(KTR_IW_CXGBE, "%s:pcc7 %p", __func__, ep); + c4iw_put_ep(&ep->com); + } + CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); + return; +} + +static void +init_sock(struct c4iw_ep_common *epc) +{ + int rc; + struct sockopt sopt; + struct socket *so = epc->so; + int on = 1; + + SOCK_LOCK(so); + soupcall_set(so, SO_RCV, c4iw_so_upcall, epc); + so->so_state |= SS_NBIO; + SOCK_UNLOCK(so); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = (caddr_t)&on; + sopt.sopt_valsize = sizeof on; + sopt.sopt_td = NULL; + rc = sosetopt(so, &sopt); + if (rc) { + log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n", + __func__, so, rc); + } +} + +static void +process_data(struct c4iw_ep *ep) +{ + struct sockaddr_in *local, *remote; + + CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sb_cc %d", __func__, + ep->com.so, ep, states[ep->com.state], ep->com.so->so_rcv.sb_cc); + + switch (state_read(&ep->com)) { + case MPA_REQ_SENT: + process_mpa_reply(ep); + break; + case MPA_REQ_WAIT: + in_getsockaddr(ep->com.so, (struct sockaddr **)&local); + in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); + ep->com.local_addr = *local; + ep->com.remote_addr = *remote; + free(local, M_SONAME); + free(remote, M_SONAME); + process_mpa_request(ep); + break; + default: + if (ep->com.so->so_rcv.sb_cc) + log(LOG_ERR, "%s: Unexpected streaming data. " + "ep %p, state %d, so %p, so_state 0x%x, sb_cc %u\n", + __func__, ep, state_read(&ep->com), ep->com.so, + ep->com.so->so_state, ep->com.so->so_rcv.sb_cc); + break; + } +} + +static void +process_connected(struct c4iw_ep *ep) +{ + + if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) + send_mpa_req(ep); + else { + connect_reply_upcall(ep, -ep->com.so->so_error); + close_socket(&ep->com, 0); + state_set(&ep->com, DEAD); + c4iw_put_ep(&ep->com); + } +} + +static struct socket * +dequeue_socket(struct socket *head, struct sockaddr_in **remote, + struct c4iw_ep *child_ep) +{ + struct socket *so; + + ACCEPT_LOCK(); + so = TAILQ_FIRST(&head->so_comp); + if (!so) { + ACCEPT_UNLOCK(); + return (NULL); + } + TAILQ_REMOVE(&head->so_comp, so, so_list); + head->so_qlen--; + SOCK_LOCK(so); + so->so_qstate &= ~SQ_COMP; + so->so_head = NULL; + soref(so); + soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep); + so->so_state |= SS_NBIO; + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + soaccept(so, (struct sockaddr **)remote); + + return (so); +} + +static void +process_newconn(struct c4iw_ep *parent_ep) +{ + struct socket *child_so; + struct c4iw_ep *child_ep; + struct sockaddr_in *remote; + + child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); + if (!child_ep) { + CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM", + __func__, parent_ep->com.so, parent_ep); + log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__); + return; + } + + child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); + if (!child_so) { + CTR4(KTR_IW_CXGBE, + "%s: parent so %p, parent ep %p, child ep %p, dequeue err", + __func__, parent_ep->com.so, parent_ep, child_ep); + log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__); + __free_ep(&child_ep->com); + return; + + } + + CTR5(KTR_IW_CXGBE, + "%s: parent so %p, parent ep %p, child so %p, child ep %p", + __func__, parent_ep->com.so, parent_ep, child_so, child_ep); + + child_ep->com.local_addr = parent_ep->com.local_addr; + child_ep->com.remote_addr = *remote; + child_ep->com.dev = parent_ep->com.dev; + child_ep->com.so = child_so; + child_ep->com.cm_id = NULL; + child_ep->com.thread = parent_ep->com.thread; + child_ep->parent_ep = parent_ep; + + free(remote, M_SONAME); + c4iw_get_ep(&parent_ep->com); + child_ep->parent_ep = parent_ep; + init_timer(&child_ep->timer); + state_set(&child_ep->com, MPA_REQ_WAIT); + START_EP_TIMER(child_ep); + + /* maybe the request has already been queued up on the socket... */ + process_mpa_request(child_ep); +} + +static int +c4iw_so_upcall(struct socket *so, void *arg, int waitflag) +{ + struct c4iw_ep *ep = arg; + + spin_lock(&req_lock); + + CTR6(KTR_IW_CXGBE, + "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p", + __func__, so, so->so_state, ep, states[ep->com.state], + ep->com.entry.tqe_prev); + + if (ep && ep->com.so && !ep->com.entry.tqe_prev) { + KASSERT(ep->com.so == so, ("%s: XXX review.", __func__)); + c4iw_get_ep(&ep->com); + TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); + queue_work(c4iw_taskq, &c4iw_task); + } + + spin_unlock(&req_lock); + return (SU_OK); +} + +static void +process_socket_event(struct c4iw_ep *ep) +{ + int state = state_read(&ep->com); + struct socket *so = ep->com.so; + + CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, " + "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state, + so->so_error, so->so_rcv.sb_state, ep, states[state]); + + if (state == CONNECTING) { + process_connected(ep); + return; + } + + if (state == LISTEN) { + process_newconn(ep); + return; + } + + /* connection error */ + if (so->so_error) { + process_conn_error(ep); + return; + } + + /* peer close */ + if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { + process_peer_close(ep); + return; + } + + /* close complete */ + if (so->so_state & SS_ISDISCONNECTED) { + process_close_complete(ep); + return; + } + + /* rx data */ + process_data(ep); +} + +SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters"); + +int db_delay_usecs = 1; +TUNABLE_INT("hw.iw_cxgbe.db_delay_usecs", &db_delay_usecs); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RW, &db_delay_usecs, 0, + "Usecs to delay awaiting db fifo to drain"); + +static int dack_mode = 1; +TUNABLE_INT("hw.iw_cxgbe.dack_mode", &dack_mode); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RW, &dack_mode, 0, + "Delayed ack mode (default = 1)"); + +int c4iw_max_read_depth = 8; +TUNABLE_INT("hw.iw_cxgbe.c4iw_max_read_depth", &c4iw_max_read_depth); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RW, &c4iw_max_read_depth, 0, + "Per-connection max ORD/IRD (default = 8)"); + +static int enable_tcp_timestamps; +TUNABLE_INT("hw.iw_cxgbe.enable_tcp_timestamps", &enable_tcp_timestamps); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RW, &enable_tcp_timestamps, 0, + "Enable tcp timestamps (default = 0)"); + +static int enable_tcp_sack; +TUNABLE_INT("hw.iw_cxgbe.enable_tcp_sack", &enable_tcp_sack); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RW, &enable_tcp_sack, 0, + "Enable tcp SACK (default = 0)"); + +static int enable_tcp_window_scaling = 1; +TUNABLE_INT("hw.iw_cxgbe.enable_tcp_window_scaling", &enable_tcp_window_scaling); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RW, &enable_tcp_window_scaling, 0, + "Enable tcp window scaling (default = 1)"); + +int c4iw_debug = 1; +TUNABLE_INT("hw.iw_cxgbe.c4iw_debug", &c4iw_debug); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RW, &c4iw_debug, 0, + "Enable debug logging (default = 0)"); + +static int peer2peer; +TUNABLE_INT("hw.iw_cxgbe.peer2peer", &peer2peer); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RW, &peer2peer, 0, + "Support peer2peer ULPs (default = 0)"); + +static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; +TUNABLE_INT("hw.iw_cxgbe.p2p_type", &p2p_type); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RW, &p2p_type, 0, + "RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)"); + +static int ep_timeout_secs = 60; +TUNABLE_INT("hw.iw_cxgbe.ep_timeout_secs", &ep_timeout_secs); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0, + "CM Endpoint operation timeout in seconds (default = 60)"); + +static int mpa_rev = 1; +TUNABLE_INT("hw.iw_cxgbe.mpa_rev", &mpa_rev); +#ifdef IW_CM_MPAV2 +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0, + "MPA Revision, 0 supports amso1100, 1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)"); +#else +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0, + "MPA Revision, 0 supports amso1100, 1 is RFC0544 spec compliant (default = 1)"); +#endif + +static int markers_enabled; +TUNABLE_INT("hw.iw_cxgbe.markers_enabled", &markers_enabled); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0, + "Enable MPA MARKERS (default(0) = disabled)"); + +static int crc_enabled = 1; +TUNABLE_INT("hw.iw_cxgbe.crc_enabled", &crc_enabled); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0, + "Enable MPA CRC (default(1) = enabled)"); + +static int rcv_win = 256 * 1024; +TUNABLE_INT("hw.iw_cxgbe.rcv_win", &rcv_win); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0, + "TCP receive window in bytes (default = 256KB)"); + +static int snd_win = 128 * 1024; +TUNABLE_INT("hw.iw_cxgbe.snd_win", &snd_win); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0, + "TCP send window in bytes (default = 128KB)"); + +int db_fc_threshold = 2000; +TUNABLE_INT("hw.iw_cxgbe.db_fc_threshold", &db_fc_threshold); +SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_fc_threshold, CTLFLAG_RW, &db_fc_threshold, 0, + "QP count/threshold that triggers automatic"); + +static void +start_ep_timer(struct c4iw_ep *ep) +{ + + if (timer_pending(&ep->timer)) { + CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep); + printk(KERN_ERR "%s timer already started! ep %p\n", __func__, + ep); + return; + } + clear_bit(TIMEOUT, &ep->com.flags); + c4iw_get_ep(&ep->com); + ep->timer.expires = jiffies + ep_timeout_secs * HZ; + ep->timer.data = (unsigned long)ep; + ep->timer.function = ep_timeout; + add_timer(&ep->timer); +} + +static void +stop_ep_timer(struct c4iw_ep *ep) +{ + + del_timer_sync(&ep->timer); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + c4iw_put_ep(&ep->com); + } +} + +static enum +c4iw_ep_state state_read(struct c4iw_ep_common *epc) +{ + enum c4iw_ep_state state; + + mutex_lock(&epc->mutex); + state = epc->state; + mutex_unlock(&epc->mutex); + + return (state); +} + +static void +__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) +{ + + epc->state = new; +} + +static void +state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) +{ + + mutex_lock(&epc->mutex); + __state_set(epc, new); + mutex_unlock(&epc->mutex); +} + +static void * +alloc_ep(int size, gfp_t gfp) +{ + struct c4iw_ep_common *epc; + + epc = kzalloc(size, gfp); + if (epc == NULL) + return (NULL); + + kref_init(&epc->kref); + mutex_init(&epc->mutex); + c4iw_init_wr_wait(&epc->wr_wait); + + return (epc); +} + +void +__free_ep(struct c4iw_ep_common *epc) +{ + CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc); + KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so)); + KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc)); + free(epc, M_DEVBUF); + CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc); +} + +void _c4iw_free_ep(struct kref *kref) +{ + struct c4iw_ep *ep; + struct c4iw_ep_common *epc; + + ep = container_of(kref, struct c4iw_ep, com.kref); + epc = &ep->com; + KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so)); + KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list", + __func__, epc)); + kfree(ep); +} + +static void release_ep_resources(struct c4iw_ep *ep) +{ + CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep); + set_bit(RELEASE_RESOURCES, &ep->com.flags); + c4iw_put_ep(&ep->com); + CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep); +} + +static void +send_mpa_req(struct c4iw_ep *ep) +{ + int mpalen; + struct mpa_message *mpa; + struct mpa_v2_conn_params mpa_v2_params; + struct mbuf *m; + char mpa_rev_to_use = mpa_rev; + int err; + + if (ep->retry_with_mpa_v1) + mpa_rev_to_use = 1; + mpalen = sizeof(*mpa) + ep->plen; + if (mpa_rev_to_use == 2) + mpalen += sizeof(struct mpa_v2_conn_params); + + if (mpalen > MHLEN) + CXGBE_UNIMPLEMENTED(__func__); + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + connect_reply_upcall(ep, -ENOMEM); + return; + } + + mpa = mtod(m, struct mpa_message *); + m->m_len = mpalen; + m->m_pkthdr.len = mpalen; + memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); + mpa->flags = (crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0) | + (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); + mpa->private_data_size = htons(ep->plen); + mpa->revision = mpa_rev_to_use; + + if (mpa_rev_to_use == 1) { + ep->tried_with_mpa_v1 = 1; + ep->retry_with_mpa_v1 = 0; + } *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***