Date: Tue, 17 Apr 2012 20:43:47 +0000 (UTC) From: Navdeep Parhar <np@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r234397 - user/np/toe_iwarp/sys/netinet Message-ID: <201204172043.q3HKhlXX098401@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: np Date: Tue Apr 17 20:43:46 2012 New Revision: 234397 URL: http://svn.freebsd.org/changeset/base/234397 Log: Changes to the TCP offload code in the kernel. input: - Deliver stray packets for an offloaded connection to the TOE driver handling the connection. - Inform the TOE driver when received data is consumed from the socket buffer. This allows for forward progress in the rx path. output: - Have one central catch-all in tcp_output instead of various tcp_output_foo routines. This ensures that the kernel never outputs anything for an offloaded connection and always kicks the TOE driver's tx routine instead. timers: - Do not arm any TCP timer for an offloaded connection. syncache: - Update the TOE driver when the entry it sought to add to the syncache is added or removed. The kernel can call on a TOE driver to respond to a SYN anytime for an entry in the syncache so the TOE driver is expected to maintain state for entries that were added but have not been deleted yet. - Do not skip any syncache checks simply because the entry is being added by a hardware TOE. ctloutput - Inform the TOE driver when setsockopt(2) changes any tcp(4) option of an offloaded socket. Modified: user/np/toe_iwarp/sys/netinet/tcp_input.c user/np/toe_iwarp/sys/netinet/tcp_offload.c user/np/toe_iwarp/sys/netinet/tcp_offload.h user/np/toe_iwarp/sys/netinet/tcp_output.c user/np/toe_iwarp/sys/netinet/tcp_subr.c user/np/toe_iwarp/sys/netinet/tcp_syncache.c user/np/toe_iwarp/sys/netinet/tcp_syncache.h user/np/toe_iwarp/sys/netinet/tcp_timer.c user/np/toe_iwarp/sys/netinet/tcp_usrreq.c user/np/toe_iwarp/sys/netinet/tcp_var.h Modified: user/np/toe_iwarp/sys/netinet/tcp_input.c ============================================================================== --- user/np/toe_iwarp/sys/netinet/tcp_input.c Tue Apr 17 20:35:54 2012 (r234396) +++ user/np/toe_iwarp/sys/netinet/tcp_input.c Tue Apr 17 20:43:46 2012 (r234397) @@ -105,6 +105,9 @@ __FBSDID("$FreeBSD$"); #ifdef TCPDEBUG #include <netinet/tcp_debug.h> #endif /* TCPDEBUG */ +#ifdef TCP_OFFLOAD +#include <netinet/tcp_offload.h> +#endif #ifdef IPSEC #include <netipsec/ipsec.h> @@ -938,6 +941,14 @@ relocked: goto dropwithreset; } +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE) { + tcp_offload_input(tp, m); + m = NULL; /* consumed by the TOE driver */ + goto dropunlock; + } +#endif + /* * We've identified a valid inpcb, but it could be that we need an * inpcbinfo write lock but don't hold it. In this case, attempt to @@ -1299,7 +1310,7 @@ relocked: (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif tcp_dooptions(&to, optp, optlen, TO_SYN); - syncache_add(&inc, &to, th, inp, &so, m); + syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL); /* * Entry added to syncache and mbuf consumed. * Everything already unlocked by syncache_add(). Modified: user/np/toe_iwarp/sys/netinet/tcp_offload.c ============================================================================== --- user/np/toe_iwarp/sys/netinet/tcp_offload.c Tue Apr 17 20:35:54 2012 (r234396) +++ user/np/toe_iwarp/sys/netinet/tcp_offload.c Tue Apr 17 20:43:46 2012 (r234397) @@ -1,145 +1,177 @@ /*- - * Copyright (c) 2007, Chelsio Inc. + * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Neither the name of the Chelsio Corporation nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_inet.h" + #include <sys/param.h> #include <sys/systm.h> #include <sys/types.h> -#include <sys/malloc.h> -#include <sys/kernel.h> -#include <sys/sysctl.h> #include <sys/mbuf.h> #include <sys/socket.h> -#include <sys/socketvar.h> - +#include <sys/sockopt.h> #include <net/if.h> -#include <net/if_types.h> -#include <net/if_var.h> #include <net/route.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/in_pcb.h> #include <netinet/tcp.h> #include <netinet/tcp_var.h> #include <netinet/tcp_offload.h> -#include <netinet/toedev.h> +#define TCPOUTFLAGS +#include <netinet/tcp_fsm.h> +#include <netinet/toecore.h> -uint32_t toedev_registration_count; +int registered_toedevs; +/* + * Provide an opportunity for a TOE driver to offload. + */ int tcp_offload_connect(struct socket *so, struct sockaddr *nam) { struct ifnet *ifp; - struct toedev *tdev; + struct toedev *tod; struct rtentry *rt; - int error; + int error = EOPNOTSUPP; + + INP_WLOCK_ASSERT(sotoinpcb(so)); + KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, + ("%s: called with sa_family %d", __func__, nam->sa_family)); - if (toedev_registration_count == 0) - return (EINVAL); - - /* - * Look up the route used for the connection to - * determine if it uses an interface capable of - * offloading the connection. - */ - rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/); - if (rt) + if (registered_toedevs == 0) + return (error); + + rt = rtalloc1(nam, 0, 0); + if (rt) RT_UNLOCK(rt); - else + else return (EHOSTUNREACH); ifp = rt->rt_ifp; - if ((ifp->if_capenable & IFCAP_TOE) == 0) { - error = EINVAL; - goto fail; - } - - tdev = TOEDEV(ifp); - if (tdev == NULL) { - error = EPERM; - goto fail; - } - - if (tdev->tod_can_offload(tdev, so) == 0) { - error = EPERM; - goto fail; - } - - return (tdev->tod_connect(tdev, so, rt, nam)); -fail: + +#ifdef INET + if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) + goto done; +#endif +#ifdef INET6 + if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)) + goto done; +#endif + + tod = TOEDEV(ifp); + if (tod != NULL) + error = tod->tod_connect(tod, so, rt, nam); +done: RTFREE(rt); return (error); } +void +tcp_offload_listen_start(struct tcpcb *tp) +{ -/* - * This file contains code as a short-term staging area before it is moved in - * to sys/netinet/tcp_offload.c - */ + INP_WLOCK_ASSERT(tp->t_inpcb); + + EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp); +} void -tcp_offload_twstart(struct tcpcb *tp) +tcp_offload_listen_stop(struct tcpcb *tp) { - INP_INFO_WLOCK(&V_tcbinfo); - INP_WLOCK(tp->t_inpcb); - tcp_twstart(tp); - INP_INFO_WUNLOCK(&V_tcbinfo); + INP_WLOCK_ASSERT(tp->t_inpcb); + + EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp); } -struct tcpcb * -tcp_offload_close(struct tcpcb *tp) +void +tcp_offload_input(struct tcpcb *tp, struct mbuf *m) { + struct toedev *tod = tp->tod; - INP_INFO_WLOCK(&V_tcbinfo); - INP_WLOCK(tp->t_inpcb); - tp = tcp_close(tp); - INP_INFO_WUNLOCK(&V_tcbinfo); - if (tp) - INP_WUNLOCK(tp->t_inpcb); + KASSERT(tod != NULL, ("%s: tp->tod is NULL", __func__)); + INP_WLOCK_ASSERT(tp->t_inpcb); - return (tp); + tod->tod_input(tod, tp, m); } -struct tcpcb * -tcp_offload_drop(struct tcpcb *tp, int error) +int +tcp_offload_output(struct tcpcb *tp) { + struct toedev *tod = tp->tod; + int error, flags; + + KASSERT(tod != NULL, ("%s: tp->tod is NULL", __func__)); + INP_WLOCK_ASSERT(tp->t_inpcb); - INP_INFO_WLOCK(&V_tcbinfo); - INP_WLOCK(tp->t_inpcb); - tp = tcp_drop(tp, error); - INP_INFO_WUNLOCK(&V_tcbinfo); - if (tp) - INP_WUNLOCK(tp->t_inpcb); + flags = tcp_outflags[tp->t_state]; - return (tp); + if (flags & TH_RST) { + /* XXX: avoid repeated calls like we do for FIN */ + error = tod->tod_send_rst(tod, tp); + } else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) && + (tp->t_flags & TF_SENTFIN) == 0) { + error = tod->tod_send_fin(tod, tp); + if (error == 0) + tp->t_flags |= TF_SENTFIN; + } else + error = tod->tod_output(tod, tp); + + return (error); +} + +void +tcp_offload_rcvd(struct tcpcb *tp) +{ + struct toedev *tod = tp->tod; + + KASSERT(tod != NULL, ("%s: tp->tod is NULL", __func__)); + INP_WLOCK_ASSERT(tp->t_inpcb); + + tod->tod_rcvd(tod, tp); } +void +tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name) +{ + struct toedev *tod = tp->tod; + + KASSERT(tod != NULL, ("%s: tp->tod is NULL", __func__)); + INP_WLOCK_ASSERT(tp->t_inpcb); + + tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name); +} + +void +tcp_offload_detach(struct tcpcb *tp) +{ + struct toedev *tod = tp->tod; + + KASSERT(tod != NULL, ("%s: tp->tod is NULL", __func__)); + INP_WLOCK_ASSERT(tp->t_inpcb); + + tod->tod_pcb_detach(tod, tp); +} Modified: user/np/toe_iwarp/sys/netinet/tcp_offload.h ============================================================================== --- user/np/toe_iwarp/sys/netinet/tcp_offload.h Tue Apr 17 20:35:54 2012 (r234396) +++ user/np/toe_iwarp/sys/netinet/tcp_offload.h Tue Apr 17 20:43:46 2012 (r234397) @@ -1,354 +1,48 @@ /*- - * Copyright (c) 2007, Chelsio Inc. + * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Neither the name of the Chelsio Corporation nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * $FreeBSD$ + * */ #ifndef _NETINET_TCP_OFFLOAD_H_ -#define _NETINET_TCP_OFFLOAD_H_ +#define _NETINET_TCP_OFFLOAD_H_ #ifndef _KERNEL #error "no user-serviceable parts inside" #endif -/* - * A driver publishes that it provides offload services - * by setting IFCAP_TOE in the ifnet. The offload connect - * will bypass any further work if the interface that a - * connection would use does not support TCP offload. - * - * The TOE API assumes that the tcp offload engine can offload the - * the entire connection from set up to teardown, with some provision - * being made to allowing the software stack to handle time wait. If - * the device does not meet these criteria, it is the driver's responsibility - * to overload the functions that it needs to in tcp_usrreqs and make - * its own calls to tcp_output if it needs to do so. - * - * There is currently no provision for the device advertising the congestion - * control algorithms it supports as there is currently no API for querying - * an operating system for the protocols that it has loaded. This is a desirable - * future extension. - * - * - * - * It is assumed that individuals deploying TOE will want connections - * to be offloaded without software changes so all connections on an - * interface providing TOE are offloaded unless the SO_NO_OFFLOAD - * flag is set on the socket. - * - * - * The toe_usrreqs structure constitutes the TOE driver's - * interface to the TCP stack for functionality that doesn't - * interact directly with userspace. If one wants to provide - * (optional) functionality to do zero-copy to/from - * userspace one still needs to override soreceive/sosend - * with functions that fault in and pin the user buffers. - * - * + tu_send - * - tells the driver that new data may have been added to the - * socket's send buffer - the driver should not fail if the - * buffer is in fact unchanged - * - the driver is responsible for providing credits (bytes in the send window) - * back to the socket by calling sbdrop() as segments are acknowledged. - * - The driver expects the inpcb lock to be held - the driver is expected - * not to drop the lock. Hence the driver is not allowed to acquire the - * pcbinfo lock during this call. - * - * + tu_rcvd - * - returns credits to the driver and triggers window updates - * to the peer (a credit as used here is a byte in the peer's receive window) - * - the driver is expected to determine how many bytes have been - * consumed and credit that back to the card so that it can grow - * the window again by maintaining its own state between invocations. - * - In principle this could be used to shrink the window as well as - * grow the window, although it is not used for that now. - * - this function needs to correctly handle being called any number of - * times without any bytes being consumed from the receive buffer. - * - The driver expects the inpcb lock to be held - the driver is expected - * not to drop the lock. Hence the driver is not allowed to acquire the - * pcbinfo lock during this call. - * - * + tu_disconnect - * - tells the driver to send FIN to peer - * - driver is expected to send the remaining data and then do a clean half close - * - disconnect implies at least half-close so only send, reset, and detach - * are legal - * - the driver is expected to handle transition through the shutdown - * state machine and allow the stack to support SO_LINGER. - * - The driver expects the inpcb lock to be held - the driver is expected - * not to drop the lock. Hence the driver is not allowed to acquire the - * pcbinfo lock during this call. - * - * + tu_reset - * - closes the connection and sends a RST to peer - * - driver is expectd to trigger an RST and detach the toepcb - * - no further calls are legal after reset - * - The driver expects the inpcb lock to be held - the driver is expected - * not to drop the lock. Hence the driver is not allowed to acquire the - * pcbinfo lock during this call. - * - * The following fields in the tcpcb are expected to be referenced by the driver: - * + iss - * + rcv_nxt - * + rcv_wnd - * + snd_isn - * + snd_max - * + snd_nxt - * + snd_una - * + t_flags - * + t_inpcb - * + t_maxseg - * + t_toe - * - * The following fields in the inpcb are expected to be referenced by the driver: - * + inp_lport - * + inp_fport - * + inp_laddr - * + inp_fport - * + inp_socket - * + inp_ip_tos - * - * The following fields in the socket are expected to be referenced by the - * driver: - * + so_comp - * + so_error - * + so_linger - * + so_options - * + so_rcv - * + so_snd - * + so_state - * + so_timeo - * - * These functions all return 0 on success and can return the following errors - * as appropriate: - * + EPERM: - * + ENOBUFS: memory allocation failed - * + EMSGSIZE: MTU changed during the call - * + EHOSTDOWN: - * + EHOSTUNREACH: - * + ENETDOWN: - * * ENETUNREACH: the peer is no longer reachable - * - * + tu_detach - * - tells driver that the socket is going away so disconnect - * the toepcb and free appropriate resources - * - allows the driver to cleanly handle the case of connection state - * outliving the socket - * - no further calls are legal after detach - * - the driver is expected to provide its own synchronization between - * detach and receiving new data. - * - * + tu_syncache_event - * - even if it is not actually needed, the driver is expected to - * call syncache_add for the initial SYN and then syncache_expand - * for the SYN,ACK - * - tells driver that a connection either has not been added or has - * been dropped from the syncache - * - the driver is expected to maintain state that lives outside the - * software stack so the syncache needs to be able to notify the - * toe driver that the software stack is not going to create a connection - * for a received SYN - * - The driver is responsible for any synchronization required between - * the syncache dropping an entry and the driver processing the SYN,ACK. - * - */ -struct toe_usrreqs { - int (*tu_send)(struct tcpcb *tp); - int (*tu_rcvd)(struct tcpcb *tp); - int (*tu_disconnect)(struct tcpcb *tp); - int (*tu_reset)(struct tcpcb *tp); - void (*tu_detach)(struct tcpcb *tp); - void (*tu_syncache_event)(int event, void *toep); -}; - -/* - * Proxy for struct tcpopt between TOE drivers and TCP functions. - */ -struct toeopt { - u_int64_t to_flags; /* see tcpopt in tcp_var.h */ - u_int16_t to_mss; /* maximum segment size */ - u_int8_t to_wscale; /* window scaling */ - - u_int8_t _pad1; /* explicit pad for 64bit alignment */ - u_int32_t _pad2; /* explicit pad for 64bit alignment */ - u_int64_t _pad3[4]; /* TBD */ -}; - -#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */ -#define TOE_SC_DROP 2 /* connection was timed out */ - -/* - * Because listen is a one-to-many relationship (a socket can be listening - * on all interfaces on a machine some of which may be using different TCP - * offload devices), listen uses a publish/subscribe mechanism. The TCP - * offload driver registers a listen notification function with the stack. - * When a listen socket is created all TCP offload devices are notified - * so that they can do the appropriate set up to offload connections on the - * port to which the socket is bound. When the listen socket is closed, - * the offload devices are notified so that they will stop listening on that - * port and free any associated resources as well as sending RSTs on any - * connections in the SYN_RCVD state. - * - */ - -typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *); -typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *); - -EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn); -EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn); - -/* - * Check if the socket can be offloaded by the following steps: - * - determine the egress interface - * - check the interface for TOE capability and TOE is enabled - * - check if the device has resources to offload the connection - */ -int tcp_offload_connect(struct socket *so, struct sockaddr *nam); - -/* - * The tcp_output_* routines are wrappers around the toe_usrreqs calls - * which trigger packet transmission. In the non-offloaded case they - * translate to tcp_output. The tcp_offload_* routines notify TOE - * of specific events. I the non-offloaded case they are no-ops. - * - * Listen is a special case because it is a 1 to many relationship - * and there can be more than one offload driver in the system. - */ - -/* - * Connection is offloaded - */ -#define tp_offload(tp) ((tp)->t_flags & TF_TOE) - -/* - * hackish way of allowing this file to also be included by TOE - * which needs to be kept ignorant of socket implementation details - */ -#ifdef _SYS_SOCKETVAR_H_ -/* - * The socket has not been marked as "do not offload" - */ -#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0) - -static __inline int -tcp_output_connect(struct socket *so, struct sockaddr *nam) -{ - struct tcpcb *tp = sototcpcb(so); - int error; - - /* - * If offload has been disabled for this socket or the - * connection cannot be offloaded just call tcp_output - * to start the TCP state machine. - */ -#ifndef TCP_OFFLOAD_DISABLE - if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0) -#endif - error = tcp_output(tp); - return (error); -} - -static __inline int -tcp_output_send(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - return (tp->t_tu->tu_send(tp)); -#endif - return (tcp_output(tp)); -} - -static __inline int -tcp_output_rcvd(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - return (tp->t_tu->tu_rcvd(tp)); -#endif - return (tcp_output(tp)); -} +extern int registered_toedevs; -static __inline int -tcp_output_disconnect(struct tcpcb *tp) -{ +int tcp_offload_connect(struct socket *, struct sockaddr *); +void tcp_offload_listen_start(struct tcpcb *); +void tcp_offload_listen_stop(struct tcpcb *); +void tcp_offload_input(struct tcpcb *, struct mbuf *); +int tcp_offload_output(struct tcpcb *); +void tcp_offload_rcvd(struct tcpcb *); +void tcp_offload_ctloutput(struct tcpcb *, int, int); +void tcp_offload_detach(struct tcpcb *); -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - return (tp->t_tu->tu_disconnect(tp)); #endif - return (tcp_output(tp)); -} - -static __inline int -tcp_output_reset(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - return (tp->t_tu->tu_reset(tp)); -#endif - return (tcp_output(tp)); -} - -static __inline void -tcp_offload_detach(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - tp->t_tu->tu_detach(tp); -#endif -} - -static __inline void -tcp_offload_listen_open(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket)) - EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp); -#endif -} - -static __inline void -tcp_offload_listen_close(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp); -#endif -} -#undef SO_OFFLOADABLE -#endif /* _SYS_SOCKETVAR_H_ */ -#undef tp_offload - -void tcp_offload_twstart(struct tcpcb *tp); -struct tcpcb *tcp_offload_close(struct tcpcb *tp); -struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error); - -#endif /* _NETINET_TCP_OFFLOAD_H_ */ Modified: user/np/toe_iwarp/sys/netinet/tcp_output.c ============================================================================== --- user/np/toe_iwarp/sys/netinet/tcp_output.c Tue Apr 17 20:35:54 2012 (r234396) +++ user/np/toe_iwarp/sys/netinet/tcp_output.c Tue Apr 17 20:43:46 2012 (r234397) @@ -75,6 +75,9 @@ __FBSDID("$FreeBSD$"); #ifdef TCPDEBUG #include <netinet/tcp_debug.h> #endif +#ifdef TCP_OFFLOAD +#include <netinet/tcp_offload.h> +#endif #ifdef IPSEC #include <netipsec/ipsec.h> @@ -191,6 +194,11 @@ tcp_output(struct tcpcb *tp) INP_WLOCK_ASSERT(tp->t_inpcb); +#ifdef TCP_OFFLOAD + if (tp->t_flags & TF_TOE) + tcp_offload_output(tp); +#endif + /* * Determine length of data that should be transmitted, * and flags that will be used. Modified: user/np/toe_iwarp/sys/netinet/tcp_subr.c ============================================================================== --- user/np/toe_iwarp/sys/netinet/tcp_subr.c Tue Apr 17 20:35:54 2012 (r234396) +++ user/np/toe_iwarp/sys/netinet/tcp_subr.c Tue Apr 17 20:43:46 2012 (r234397) @@ -85,7 +85,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #include <netinet/tcp_syncache.h> -#include <netinet/tcp_offload.h> #ifdef INET6 #include <netinet6/tcp6_var.h> #endif @@ -96,6 +95,9 @@ __FBSDID("$FreeBSD$"); #ifdef INET6 #include <netinet6/ip6protosw.h> #endif +#ifdef TCP_OFFLOAD +#include <netinet/tcp_offload.h> +#endif #ifdef IPSEC #include <netipsec/ipsec.h> @@ -824,7 +826,7 @@ tcp_drop(struct tcpcb *tp, int errno) if (TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_state = TCPS_CLOSED; - (void) tcp_output_reset(tp); + (void) tcp_output(tp); TCPSTAT_INC(tcps_drops); } else TCPSTAT_INC(tcps_conndrops); @@ -924,8 +926,12 @@ tcp_discardcb(struct tcpcb *tp) /* free the reassembly queue, if any */ tcp_reass_flush(tp); + +#ifdef TCP_OFFLOAD /* Disconnect offload device, if any. */ - tcp_offload_detach(tp); + if (tp->t_flags & TF_TOE) + tcp_offload_detach(tp); +#endif tcp_free_sackholes(tp); @@ -954,9 +960,10 @@ tcp_close(struct tcpcb *tp) INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); - /* Notify any offload devices of listener close */ +#ifdef TCP_OFFLOAD if (tp->t_state == TCPS_LISTEN) - tcp_offload_listen_close(tp); + tcp_offload_listen_stop(tp); +#endif in_pcbdrop(inp); TCPSTAT_INC(tcps_closed); KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL")); @@ -1687,7 +1694,7 @@ tcp_mtudisc(struct inpcb *inp, int errno tp->snd_recover = tp->snd_max; if (tp->t_flags & TF_SACK_PERMIT) EXIT_FASTRECOVERY(tp->t_flags); - tcp_output_send(tp); + tcp_output(tp); return (inp); } Modified: user/np/toe_iwarp/sys/netinet/tcp_syncache.c ============================================================================== --- user/np/toe_iwarp/sys/netinet/tcp_syncache.c Tue Apr 17 20:35:54 2012 (r234396) +++ user/np/toe_iwarp/sys/netinet/tcp_syncache.c Tue Apr 17 20:43:46 2012 (r234397) @@ -81,10 +81,12 @@ __FBSDID("$FreeBSD$"); #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #include <netinet/tcp_syncache.h> -#include <netinet/tcp_offload.h> #ifdef INET6 #include <netinet6/tcp6_var.h> #endif +#ifdef TCP_OFFLOAD +#include <netinet/toecore.h> +#endif #ifdef IPSEC #include <netipsec/ipsec.h> @@ -110,10 +112,8 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, &VNET_NAME(tcp_syncookiesonly), 0, "Use only TCP SYN cookies"); -#ifdef TCP_OFFLOAD_DISABLE -#define TOEPCB_ISSET(sc) (0) -#else -#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL) +#ifdef TCP_OFFLOAD +#define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL) #endif static void syncache_drop(struct syncache *, struct syncache_head *); @@ -332,6 +332,14 @@ syncache_insert(struct syncache *sc, str TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash); sch->sch_length++; +#ifdef TCP_OFFLOAD + if (ADDED_BY_TOE(sc)) { + struct toedev *tod = sc->sc_tod; + + tod->tod_syncache_added(tod, sc->sc_todctx); + } +#endif + /* Reinitialize the bucket row's timer. */ if (sch->sch_length == 1) sch->sch_nextc = ticks + INT_MAX; @@ -356,10 +364,14 @@ syncache_drop(struct syncache *sc, struc TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); sch->sch_length--; -#ifndef TCP_OFFLOAD_DISABLE - if (sc->sc_tu) - sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb); -#endif +#ifdef TCP_OFFLOAD + if (ADDED_BY_TOE(sc)) { + struct toedev *tod = sc->sc_tod; + + tod->tod_syncache_removed(tod, sc->sc_todctx); + } +#endif + syncache_free(sc); V_tcp_syncache.cache_count--; } @@ -926,6 +938,13 @@ syncache_expand(struct in_conninfo *inc, /* Pull out the entry to unlock the bucket row. */ TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); sch->sch_length--; +#ifdef TCP_OFFLOAD + if (ADDED_BY_TOE(sc)) { + struct toedev *tod = sc->sc_tod; + + tod->tod_syncache_removed(tod, sc->sc_todctx); + } +#endif V_tcp_syncache.cache_count--; SCH_UNLOCK(sch); } @@ -934,7 +953,7 @@ syncache_expand(struct in_conninfo *inc, * Segment validation: * ACK must match our initial sequence number + 1 (the SYN|ACK). */ - if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) { + if (th->th_ack != sc->sc_iss + 1) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment " "rejected\n", s, __func__, th->th_ack, sc->sc_iss); @@ -945,9 +964,8 @@ syncache_expand(struct in_conninfo *inc, * The SEQ must fall in the window starting at the received * initial receive sequence number + 1 (the SYN). */ - if ((SEQ_LEQ(th->th_seq, sc->sc_irs) || - SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) && - !TOEPCB_ISSET(sc)) { + if (SEQ_LEQ(th->th_seq, sc->sc_irs) || + SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment " "rejected\n", s, __func__, th->th_seq, sc->sc_irs); @@ -964,8 +982,7 @@ syncache_expand(struct in_conninfo *inc, * If timestamps were negotiated the reflected timestamp * must be equal to what we actually sent in the SYN|ACK. */ - if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts && - !TOEPCB_ISSET(sc)) { + if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, " "segment rejected\n", @@ -993,25 +1010,6 @@ failed: return (0); } -int -tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo, - struct tcphdr *th, struct socket **lsop, struct mbuf *m) -{ - struct tcpopt to; - int rc; - - bzero(&to, sizeof(struct tcpopt)); - to.to_mss = toeo->to_mss; - to.to_wscale = toeo->to_wscale; - to.to_flags = toeo->to_flags; - - INP_INFO_WLOCK(&V_tcbinfo); - rc = syncache_expand(inc, &to, th, lsop, m); - INP_INFO_WUNLOCK(&V_tcbinfo); - - return (rc); -} - /* * Given a LISTEN socket and an inbound SYN request, add * this to the syn cache, and send back a segment: @@ -1025,10 +1023,10 @@ tcp_offload_syncache_expand(struct in_co * consume all available buffer space if it were ACKed. By not ACKing * the data, we avoid this DoS scenario. */ -static void -_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, - struct inpcb *inp, struct socket **lsop, struct mbuf *m, - struct toe_usrreqs *tu, void *toepcb) +void +syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, + struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod, + void *todctx) { struct tcpcb *tp; struct socket *so; @@ -1114,11 +1112,6 @@ _syncache_add(struct in_conninfo *inc, s sc = syncache_lookup(inc, &sch); /* returns locked entry */ SCH_LOCK_ASSERT(sch); if (sc != NULL) { -#ifndef TCP_OFFLOAD_DISABLE - if (sc->sc_tu) - sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT, - sc->sc_toepcb); -#endif TCPSTAT_INC(tcps_sc_dupsyn); if (ipopts) { /* @@ -1151,7 +1144,7 @@ _syncache_add(struct in_conninfo *inc, s s, __func__); free(s, M_TCPLOG); } - if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) { + if (syncache_respond(sc) == 0) { sc->sc_rxmits = 0; syncache_timeout(sc, sch, 1); TCPSTAT_INC(tcps_sndacks); @@ -1202,9 +1195,9 @@ _syncache_add(struct in_conninfo *inc, s sc->sc_ip_tos = ip_tos; sc->sc_ip_ttl = ip_ttl; } -#ifndef TCP_OFFLOAD_DISABLE - sc->sc_tu = tu; - sc->sc_toepcb = toepcb; +#ifdef TCP_OFFLOAD + sc->sc_tod = tod; + sc->sc_todctx = todctx; #endif sc->sc_irs = th->th_seq; sc->sc_iss = arc4random(); @@ -1299,7 +1292,7 @@ _syncache_add(struct in_conninfo *inc, s /* * Do a standard 3-way handshake. */ - if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) { + if (syncache_respond(sc) == 0) { if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs) syncache_free(sc); else if (sc != &scs) @@ -1491,37 +1484,21 @@ syncache_respond(struct syncache *sc) htons(tlen + optlen - hlen + IPPROTO_TCP)); m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); +#ifdef TCP_OFFLOAD + if (ADDED_BY_TOE(sc)) { + struct toedev *tod = sc->sc_tod; + + error = tod->tod_syncache_respond(tod, sc->sc_todctx, m); + + return (error); + } +#endif error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL); } #endif return (error); } -void -syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, - struct inpcb *inp, struct socket **lsop, struct mbuf *m) -{ - _syncache_add(inc, to, th, inp, lsop, m, NULL, NULL); -} - *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204172043.q3HKhlXX098401>