From owner-p4-projects@FreeBSD.ORG Wed May 30 05:57:42 2007 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 38FA016A46C; Wed, 30 May 2007 05:57:42 +0000 (UTC) X-Original-To: perforce@freebsd.org Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id D811C16A46B for ; Wed, 30 May 2007 05:57:41 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [69.147.83.41]) by mx1.freebsd.org (Postfix) with ESMTP id C7A5813C45D for ; Wed, 30 May 2007 05:57:41 +0000 (UTC) (envelope-from kmacy@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.8/8.13.8) with ESMTP id l4U5vf0G053590 for ; Wed, 30 May 2007 05:57:41 GMT (envelope-from kmacy@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.8/8.13.8/Submit) id l4U5vfRb053581 for perforce@freebsd.org; Wed, 30 May 2007 05:57:41 GMT (envelope-from kmacy@freebsd.org) Date: Wed, 30 May 2007 05:57:41 GMT Message-Id: <200705300557.l4U5vfRb053581@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to kmacy@freebsd.org using -f From: Kip Macy To: Perforce Change Reviews Cc: Subject: PERFORCE change 120595 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 30 May 2007 05:57:42 -0000 http://perforce.freebsd.org/chv.cgi?CH=120595 Change 120595 by kmacy@kmacy_vt-x:opentoe_init on 2007/05/30 05:56:44 initial cut of protosw hooking of TCP offload module Affected files ... .. //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_socket.c#2 edit .. //depot/projects/opentoe/sys/modules/cxgb/t3_tom/Makefile#3 edit Differences ... ==== //depot/projects/opentoe/sys/dev/cxgb/ulp/t3_tom/t3_cpl_socket.c#2 (text+ko) ==== @@ -9,11 +9,7 @@ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - 3. Neither the name of the Chelsio Corporation nor the names of its + 2. Neither the name of the Chelsio Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. @@ -31,6 +27,473 @@ ***************************************************************************/ +#include "opt_ddb.h" +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_tcpdebug.h" + +#include +#include +#include +#include +#include +#include +#ifdef INET6 +#include +#endif /* INET6 */ +#include +#include +#include +#include +#include + +#ifdef DDB +#include +#endif + +#include +#include + +#include +#include +#ifdef INET6 +#include +#endif +#include +#ifdef INET6 +#include +#endif +#include +#include +#ifdef INET6 +#include +#include +#endif +#include +#include +#include +#include +#include +#include +#ifdef TCPDEBUG +#include +#endif + +#ifdef TCPDEBUG +#define TCPDEBUG0 int ostate = 0 +#define TCPDEBUG1() ostate = tp ? tp->t_state : 0 +#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \ + tcp_trace(TA_USER, ostate, tp, 0, 0, req) +#else +#define TCPDEBUG0 +#define TCPDEBUG1() +#define TCPDEBUG2(req) +#endif + +static int (*tcp_usr_bind)(struct socket *so, struct sockaddr *nam, struct thread *td); +static int (*tcp_usr_attach)(struct socket *so, int proto, struct thread *td); + +static int +chelsio_ip_ctloutput(struct socket *so, struct sockopt *sopt) +{ + struct inpcb *inp = sotoinpcb(so); + int error, optval; + + error = optval = 0; + + if (sopt->sopt_name == IP_TOS) { + switch (sopt->sopt_dir) { + case SOPT_SET: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (inp->inp_ip_tos != optval) { + inp->inp_ip_tos = optval; +#ifdef notyet + sk->sk_priority = rt_tos2priority(optval); +#endif + t3_set_tos(so); + } + break; + case SOPT_GET: + optval = inp->inp_ip_tos; + error = sooptcopyout(sopt, &optval, sizeof optval); + break; + default: + error = ENOPROTOOPT; + break; + } + } +#ifdef INET6 + else if (INP_CHECK_SOCKAF(so, AF_INET6)) + error = ip6_ctloutput(so, sopt); +#endif /* INET6 */ + else + error = ip_ctloutput(so, sopt); +} + +static int +chelsio_tcp_ctloutput(struct socket *so, struct sockopt *sopt) +{ + + if (sopt->sopt_level != IPPROTO_TCP) { + INP_UNLOCK(inp); + chelsio_ip_ctloutput(so, sopt); + return (error); + } + if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { + error = ECONNRESET; + goto out; + } + tp = intotcpcb(inp); + if (optname == TCP_NODELAY) { + int oldflags = tp->t_flags; + + if (oldflags & TF_NODELAY) + tp->t_flags &= ~TF_NODELAY; + else + tp->t_flags |= TF_NODELAY; + + if ((oldflags & TF_NODELAY) == 0) + t3_set_nagle(so); + } else + tcp_ctloutput(so, sopt); + + return (0); +} + +/* + * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail + * under any circumstances. We take the easy way out and always queue the + * message to the write_queue. We can optimize the case where the queue is + * already empty though the optimization is probably not worth it. + */ +static void +close_conn(struct socket *so) +{ +#ifdef notyet + struct sk_buff *skb; + struct cpl_close_con_req *req; + unsigned int tid = TID(tcp_sk(sk)); + + skb = alloc_skb_nofail(sizeof(struct cpl_close_con_req)); + req = (struct cpl_close_con_req *)__skb_put(skb, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); + req->wr.wr_lo = htonl(V_WR_TID(tid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); + req->rsvd = htonl(tcp_sk(sk)->write_seq); + + tcp_uncork(sk); + skb_entail(sk, skb, TCPCB_FLAG_NO_APPEND); + if (sk->sk_state != TCP_SYN_SENT) + t3_push_frames(sk, 1); +#endif +} + /* - * Placeholder for FreeBSD equivalent to t3_cpl_sock.c + * Drop a TCP connection, reporting + * the specified error. If connection is synchronized, + * then send a RST to peer. + */ +static struct tcpcb * +chelsio_tcp_drop(struct tcpcb *tp, int errno) +{ + struct socket *so = tp->t_inpcb->inp_socket; + + INP_INFO_WLOCK_ASSERT(&tcbinfo); + INP_LOCK_ASSERT(tp->t_inpcb); + + if (TCPS_HAVERCVDSYN(tp->t_state)) { + tp->t_state = TCPS_CLOSED; + close_conn(so); + tcpstat.tcps_drops++; + } else + tcpstat.tcps_conndrops++; + + if (errno == ETIMEDOUT && tp->t_softerror) + errno = tp->t_softerror; + so->so_error = errno; + return (tcp_close(tp)); +} + +/* + * Abort the TCP. Drop the connection abruptly. + */ +static void +chelsio_usr_abort(struct socket *so) +{ + struct inpcb *inp; + struct tcpcb *tp = NULL; + TCPDEBUG0; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL")); + + INP_INFO_WLOCK(&tcbinfo); + INP_LOCK(inp); + KASSERT(inp->inp_socket != NULL, + ("tcp_usr_abort: inp_socket == NULL")); + + /* + * If we still have full TCP state, and we're not dropped, drop. + */ + if (!(inp->inp_vflag & INP_TIMEWAIT) && + !(inp->inp_vflag & INP_DROPPED)) { + tp = intotcpcb(inp); + TCPDEBUG1(); + chelsio_tcp_drop(tp, ECONNABORTED); + TCPDEBUG2(PRU_ABORT); + } + if (!(inp->inp_vflag & INP_DROPPED)) { + SOCK_LOCK(so); + so->so_state |= SS_PROTOREF; + SOCK_UNLOCK(so); + inp->inp_vflag |= INP_SOCKREF; + } + INP_UNLOCK(inp); + INP_INFO_WUNLOCK(&tcbinfo); +} + +static int +chelsio_usr_attach(struct socket *so, int proto, struct thread *td) +{ + /* XXX */ + return (0); +} + +static int +chelsio_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td) +{ + tcp_usr_bind(so, nam, td); + /* + * XXX sync state with card + */ + return (0); +} + +/* + * Initiate (or continue) disconnect. + * If embryonic state, just send reset (once). + * If in ``let data drain'' option and linger null, just drop. + * Otherwise (hard), mark socket disconnecting and drop + * current input data; switch states based on user close, and + * send segment to peer (with FIN). + */ +static void +chelsio_tcp_disconnect(struct tcpcb *tp) +{ + /* XXX */ +} + +/* + * TCP socket is closed. Start friendly disconnect. + */ +static void +chelsio_usr_close(struct socket *so) +{ + struct inpcb *inp; + struct tcpcb *tp = NULL; + TCPDEBUG0; + + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL")); + + INP_INFO_WLOCK(&tcbinfo); + INP_LOCK(inp); + KASSERT(inp->inp_socket != NULL, + ("tcp_usr_close: inp_socket == NULL")); + + /* + * If we still have full TCP state, and we're not dropped, initiate + * a disconnect. + */ + if (!(inp->inp_vflag & INP_TIMEWAIT) && + !(inp->inp_vflag & INP_DROPPED)) { + tp = intotcpcb(inp); + TCPDEBUG1(); + chelsio_tcp_disconnect(tp); + TCPDEBUG2(PRU_CLOSE); + } + if (!(inp->inp_vflag & INP_DROPPED)) { + SOCK_LOCK(so); + so->so_state |= SS_PROTOREF; + SOCK_UNLOCK(so); + inp->inp_vflag |= INP_SOCKREF; + } + INP_UNLOCK(inp); + INP_INFO_WUNLOCK(&tcbinfo); +} + +static int +chelsio_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td) +{ + /* XXX */ + return (0); +} + +/* + * Initiate disconnect from peer. + * If connection never passed embryonic stage, just drop; + * else if don't need to let data drain, then can just drop anyways, + * else have to begin TCP shutdown process: mark socket disconnecting, + * drain unread data, state switch to reflect user close, and + * send segment (e.g. FIN) to peer. Socket will be really disconnected + * when peer sends FIN and acks ours. + * + * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. + */ +static int +chelsio_usr_disconnect(struct socket *so) +{ + struct inpcb *inp; + struct tcpcb *tp = NULL; + int error = 0; + + TCPDEBUG0; + INP_INFO_WLOCK(&tcbinfo); + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL")); + INP_LOCK(inp); + if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { + error = ECONNRESET; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + chelsio_tcp_disconnect(tp); +out: + TCPDEBUG2(PRU_DISCONNECT); + INP_UNLOCK(inp); + INP_INFO_WUNLOCK(&tcbinfo); + return (error); +} + +/* + * After a receive, possibly send window update to peer. + */ +static int +chelsio_usr_rcvd(struct socket *so, int flags) +{ + /* XXX */ + + return (0); +} + +static int +chelsio_usr_send(struct socket *so, int flags, struct mbuf *m, + struct sockaddr *nam, struct mbuf *control, struct thread *td) +{ + /* XXX */ + return (0); +} + +/* + * State transitions and actions for close. Note that if we are in SYN_SENT + * we remain in that state as we cannot control a connection while it's in + * SYN_SENT; such connections are allowed to establish and are then aborted. + */ +static unsigned char new_state[16] = { + /* current state: new state: action: */ + /* (Invalid) */ TCP_CLOSE, + /* TCP_ESTABLISHED */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, + /* TCP_SYN_SENT */ TCP_SYN_SENT, + /* TCP_SYN_RECV */ TCP_FIN_WAIT1 | TCP_ACTION_FIN, + /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1, + /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2, + /* TCP_TIME_WAIT */ TCP_CLOSE, + /* TCP_CLOSE */ TCP_CLOSE, + /* TCP_CLOSE_WAIT */ TCP_LAST_ACK | TCP_ACTION_FIN, + /* TCP_LAST_ACK */ TCP_LAST_ACK, + /* TCP_LISTEN */ TCP_CLOSE, + /* TCP_CLOSING */ TCP_CLOSING, +}; + +/* + * Perform a state transition during close and return the actions indicated + * for the transition. Do not make this function inline, the main reason + * it exists at all is to avoid multiple inlining of tcp_set_state. + */ +static int +close_transition(struct tcpcb *tp) +{ + int next = (int)new_state[tp->t_state]; + + tp->t_state = next & TCP_STATE_MASK; + return (next & TCP_ACTION_FIN); +} + +/* + * Mark the connection as being incapable of further output. */ +static int +chelsio_usr_shutdown(struct socket *so) +{ + int error = 0; + struct inpcb *inp; + struct tcpcb *tp = NULL; + + TCPDEBUG0; + INP_INFO_WLOCK(&tcbinfo); + inp = sotoinpcb(so); + KASSERT(inp != NULL, ("inp == NULL")); + INP_LOCK(inp); + if (inp->inp_vflag & (INP_TIMEWAIT | INP_DROPPED)) { + error = ECONNRESET; + goto out; + } + tp = intotcpcb(inp); + TCPDEBUG1(); + socantsendmore(so); + close_transition(tp); + close_conn(so); +out: + TCPDEBUG2(PRU_SHUTDOWN); + INP_UNLOCK(inp); + INP_INFO_WUNLOCK(&tcbinfo); + + return (error); +} + +struct protosw t3_tcp_protosw; +struct pr_usrreqs t3_tcp_usrreqs = { + .pru_abort = chelsio_usr_abort, + .pru_attach = chelsio_usr_attach, + .pru_bind = chelsio_usr_bind, + .pru_close = chelsio_usr_close, + .pru_connect = chelsio_usr_connect, + .pru_control = in6_control, + .pru_disconnect = chelsio_usr_disconnect, + .pru_rcvd = chelsio_usr_rcvd, + .pru_send = chelsio_usr_send, + .pru_shutdown = chelsio_usr_shutdown, + .pru_sosetlabel = in_pcbsosetlabel, +}; + +static void +t3_init_offload_ops(void) +{ + struct protosw *tcp_protosw; + + /* + * XXX need to handle AF_INET6 case as well + */ + tcp_protosw = pffindtype(AF_INET, SOCK_STREAM); + + t3_tcp_protosw = *tcp_protosw; + + t3_tcp_protosw.pr_ctloutput = chelsio_tcp_ctloutput; + + t3_tcp_usrreqs = tcp_usrreqs; + tcp_usr_bind = tcp_usrreqs.pru_bind; + tcp_usr_attach = tcp_usrreqs.pru_attach; + + t3_tcp_usrreqs.pru_accept = tcp_usrreqs.pru_accept; + t3_tcp_usrreqs.pru_control = tcp_usrreqs.pru_control; + t3_tcp_usrreqs.pru_detach = tcp_usrreqs.pru_detach; + t3_tcp_usrreqs.pru_listen = tcp_usrreqs.pru_listen; + t3_tcp_usrreqs.pru_peeraddr = tcp_usrreqs.pru_peeraddr; + t3_tcp_usrreqs.pru_rcvoob = tcp_usrreqs.pru_rcvoob; + t3_tcp_usrreqs.pru_sockaddr = tcp_usrreqs.pru_sockaddr; + t3_tcp_usrreqs.pru_sosetlabel = tcp_usrreqs.pru_sosetlabel; + + t3_tcp_protosw.pr_usrreqs = &t3_tcp_usrreqs; +} ==== //depot/projects/opentoe/sys/modules/cxgb/t3_tom/Makefile#3 (text+ko) ==== @@ -3,8 +3,9 @@ .PATH: ${ULP}/t3_tom ${ULP}/toedev KMOD= t3_tom -SRCS= t3_ddp.c t3_tom.c t3_cpl_io.c -SRCS+= t3_listen.c t3_cpl_socket.c -SRCS+= device_if.h bus_if.h pci_if.h +SRCS= t3_cpl_socket.c t3_ddp.c +SRCS+= t3_tom.c t3_cpl_io.c t3_listen.c +SRCS+= device_if.h bus_if.h pci_if.h opt_inet.h +SRCS+= opt_inet6.h opt_tcpdebug.h opt_ddb.h .include