Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 24 Dec 2015 19:09:49 +0000 (UTC)
From:      Patrick Kelsey <pkelsey@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r292706 - in head/sys: conf netinet
Message-ID:  <201512241909.tBOJ9nlX070850@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: pkelsey
Date: Thu Dec 24 19:09:48 2015
New Revision: 292706
URL: https://svnweb.freebsd.org/changeset/base/292706

Log:
  Implementation of server-side TCP Fast Open (TFO) [RFC7413].
  
  TFO is disabled by default in the kernel build.  See the top comment
  in sys/netinet/tcp_fastopen.c for implementation particulars.
  
  Reviewed by:	gnn, jch, stas
  MFC after:	3 days
  Sponsored by:	Verisign, Inc.
  Differential Revision:	https://reviews.freebsd.org/D4350

Added:
  head/sys/netinet/tcp_fastopen.c   (contents, props changed)
  head/sys/netinet/tcp_fastopen.h   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/conf/options
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_syncache.c
  head/sys/netinet/tcp_syncache.h
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Thu Dec 24 18:53:17 2015	(r292705)
+++ head/sys/conf/files	Thu Dec 24 19:09:48 2015	(r292706)
@@ -3688,6 +3688,7 @@ netinet/sctp_usrreq.c		optional inet sct
 netinet/sctputil.c		optional inet sctp | inet6 sctp
 netinet/siftr.c			optional inet siftr alq | inet6 siftr alq
 netinet/tcp_debug.c		optional tcpdebug
+netinet/tcp_fastopen.c		optional inet tcp_rfc7413 | inet6 tcp_rfc7413
 netinet/tcp_hostcache.c		optional inet | inet6
 netinet/tcp_input.c		optional inet | inet6
 netinet/tcp_lro.c		optional inet | inet6

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options	Thu Dec 24 18:53:17 2015	(r292705)
+++ head/sys/conf/options	Thu Dec 24 19:09:48 2015	(r292706)
@@ -440,6 +440,8 @@ TCPDEBUG
 TCPPCAP		opt_global.h
 SIFTR
 TCP_OFFLOAD		opt_inet.h # Enable code to dispatch TCP offloading
+TCP_RFC7413		opt_inet.h
+TCP_RFC7413_MAX_KEYS	opt_inet.h
 TCP_SIGNATURE		opt_inet.h
 VLAN_ARRAY		opt_vlan.h
 XBONEHACK

Modified: head/sys/netinet/tcp.h
==============================================================================
--- head/sys/netinet/tcp.h	Thu Dec 24 18:53:17 2015	(r292705)
+++ head/sys/netinet/tcp.h	Thu Dec 24 19:09:48 2015	(r292706)
@@ -97,6 +97,10 @@ struct tcphdr {
 #define    TCPOLEN_TSTAMP_APPA		(TCPOLEN_TIMESTAMP+2) /* appendix A */
 #define	TCPOPT_SIGNATURE	19		/* Keyed MD5: RFC 2385 */
 #define	   TCPOLEN_SIGNATURE		18
+#define	TCPOPT_FAST_OPEN	34
+#define	   TCPOLEN_FAST_OPEN_EMPTY	2
+#define	   TCPOLEN_FAST_OPEN_MIN	6
+#define	   TCPOLEN_FAST_OPEN_MAX	18
 
 /* Miscellaneous constants */
 #define	MAX_SACK_BLKS	6	/* Max # SACK blocks stored at receiver side */
@@ -165,6 +169,7 @@ struct tcphdr {
 #define	TCP_KEEPIDLE	256	/* L,N,X start keeplives after this period */
 #define	TCP_KEEPINTVL	512	/* L,N interval between keepalives */
 #define	TCP_KEEPCNT	1024	/* L,N number of keepalives before close */
+#define	TCP_FASTOPEN	1025	/* enable TFO / was created via TFO */
 #define	TCP_PCAP_OUT	2048	/* number of output packets to keep */
 #define	TCP_PCAP_IN	4096	/* number of input packets to keep */
 #define TCP_FUNCTION_BLK 8192	/* Set the tcp function pointers to the specified stack */

Added: head/sys/netinet/tcp_fastopen.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/netinet/tcp_fastopen.c	Thu Dec 24 19:09:48 2015	(r292706)
@@ -0,0 +1,442 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413].
+ *
+ * This implementation is currently considered to be experimental and is not
+ * included in kernel builds by default.  To include this code, add the
+ * following line to your kernel config:
+ *
+ * options TCP_RFC7413
+ *
+ * The generated TFO cookies are the 64-bit output of
+ * SipHash24(<16-byte-key><client-ip>).  Multiple concurrent valid keys are
+ * supported so that time-based rolling cookie invalidation policies can be
+ * implemented in the system.  The default number of concurrent keys is 2.
+ * This can be adjusted in the kernel config as follows:
+ *
+ * options TCP_RFC7413_MAX_KEYS=<num-keys>
+ *
+ *
+ * The following TFO-specific sysctls are defined:
+ *
+ * net.inet.tcp.fastopen.acceptany (RW, default 0)
+ *     When non-zero, all client-supplied TFO cookies will be considered to
+ *     be valid.
+ *
+ * net.inet.tcp.fastopen.autokey (RW, default 120)
+ *     When this and net.inet.tcp.fastopen.enabled are non-zero, a new key
+ *     will be automatically generated after this many seconds.
+ *
+ * net.inet.tcp.fastopen.enabled (RW, default 0)
+ *     When zero, no new TFO connections can be created.  On the transition
+ *     from enabled to disabled, all installed keys are removed.  On the 
+ *     transition from disabled to enabled, if net.inet.tcp.fastopen.autokey
+ *     is non-zero and there are no keys installed, a new key will be 
+ *     generated immediately.  The transition from enabled to disabled does
+ *     not affect any TFO connections in progress; it only prevents new ones
+ *     from being made.
+ *
+ * net.inet.tcp.fastopen.keylen (RO)
+ *     The key length in bytes.
+ *
+ * net.inet.tcp.fastopen.maxkeys (RO)
+ *     The maximum number of keys supported.
+ *
+ * net.inet.tcp.fastopen.numkeys (RO)
+ *     The current number of keys installed.
+ *
+ * net.inet.tcp.fastopen.setkey (WO)
+ *     Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this
+ *     sysctl.
+ *
+ *
+ * In order for TFO connections to be created via a listen socket, that
+ * socket must have the TCP_FASTOPEN socket option set on it.  This option
+ * can be set on the socket either before or after the listen() is invoked.
+ * Clearing this option on a listen socket after it has been set has no
+ * effect on existing TFO connections or TFO connections in progress; it
+ * only prevents new TFO connections from being made.
+ *
+ * For passively-created sockets, the TCP_FASTOPEN socket option can be
+ * queried to determine whether the connection was established using TFO.
+ * Note that connections that are established via a TFO SYN, but that fall
+ * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option
+ * set.
+ *
+ * Per the RFC, this implementation limits the number of TFO connections
+ * that can be in the SYN_RECEIVED state on a per listen-socket basis.
+ * Whenever this limit is exceeded, requests for new TFO connections are
+ * serviced as non-TFO requests.  Without such a limit, given a valid TFO
+ * cookie, an attacker could keep the listen queue in an overflow condition
+ * using a TFO SYN flood.  This implementation sets the limit at half the
+ * configured listen backlog.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <crypto/siphash/siphash.h>
+
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/tcp_fastopen.h>
+#include <netinet/tcp_var.h>
+
+
+#define	TCP_FASTOPEN_KEY_LEN	SIPHASH_KEY_LENGTH
+
+#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
+#define	TCP_FASTOPEN_MAX_KEYS	2
+#else
+#define	TCP_FASTOPEN_MAX_KEYS	TCP_RFC7413_MAX_KEYS
+#endif
+
+struct tcp_fastopen_keylist {
+	unsigned int newest;
+	uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
+};
+
+struct tcp_fastopen_callout {
+	struct callout c;
+	struct vnet *v;
+};
+
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open");
+
+static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0;
+#define	V_tcp_fastopen_acceptany	VNET(tcp_fastopen_acceptany)
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany,
+    CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0,
+    "Accept any non-empty cookie");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_autokey) = 120;
+#define	V_tcp_fastopen_autokey	VNET(tcp_fastopen_autokey)
+static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+    &sysctl_net_inet_tcp_fastopen_autokey, "IU",
+    "Number of seconds between auto-generation of a new key; zero disables");
+
+VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0;
+static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled,
+    CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+    &sysctl_net_inet_tcp_fastopen_enabled, "IU",
+    "Enable/disable TCP Fast Open processing");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
+    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
+    "Key length in bytes");
+
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
+    CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
+    "Maximum number of keys supported");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0;
+#define	V_tcp_fastopen_numkeys	VNET(tcp_fastopen_numkeys)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
+    CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
+    "Number of keys installed");
+
+static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
+    CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
+    &sysctl_net_inet_tcp_fastopen_setkey, "",
+    "Install a new key");
+
+static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock);
+#define	V_tcp_fastopen_keylock	VNET(tcp_fastopen_keylock)
+
+#define TCP_FASTOPEN_KEYS_RLOCK(t)	rm_rlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_RUNLOCK(t)	rm_runlock(&V_tcp_fastopen_keylock, (t))
+#define TCP_FASTOPEN_KEYS_WLOCK()	rm_wlock(&V_tcp_fastopen_keylock)
+#define TCP_FASTOPEN_KEYS_WUNLOCK()	rm_wunlock(&V_tcp_fastopen_keylock)
+
+static VNET_DEFINE(struct tcp_fastopen_keylist, tcp_fastopen_keys);
+#define V_tcp_fastopen_keys	VNET(tcp_fastopen_keys)
+
+static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx);
+#define V_tcp_fastopen_autokey_ctx	VNET(tcp_fastopen_autokey_ctx)
+
+static VNET_DEFINE(uma_zone_t, counter_zone);
+#define	V_counter_zone			VNET(counter_zone)
+
+void
+tcp_fastopen_init(void)
+{
+	V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+	rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
+	callout_init_rm(&V_tcp_fastopen_autokey_ctx.c,
+	    &V_tcp_fastopen_keylock, 0);
+	V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+}
+
+void
+tcp_fastopen_destroy(void)
+{
+	callout_drain(&V_tcp_fastopen_autokey_ctx.c);
+	rm_destroy(&V_tcp_fastopen_keylock);
+	uma_zdestroy(V_counter_zone);
+}
+
+unsigned int *
+tcp_fastopen_alloc_counter(void)
+{
+	unsigned int *counter;
+	counter = uma_zalloc(V_counter_zone, M_NOWAIT);
+	if (counter)
+		*counter = 1;
+	return (counter);
+}
+
+void
+tcp_fastopen_decrement_counter(unsigned int *counter)
+{
+	if (*counter == 1)
+		uma_zfree(V_counter_zone, counter);
+	else
+		atomic_subtract_int(counter, 1);
+}
+
+static void
+tcp_fastopen_addkey_locked(uint8_t *key)
+{
+
+	V_tcp_fastopen_keys.newest++;
+	if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS)
+		V_tcp_fastopen_keys.newest = 0;
+	memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key,
+	    TCP_FASTOPEN_KEY_LEN);
+	if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS)
+		V_tcp_fastopen_numkeys++;
+}
+
+static void
+tcp_fastopen_autokey_locked(void)
+{
+	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+	arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0);
+	tcp_fastopen_addkey_locked(newkey);
+}
+
+static void
+tcp_fastopen_autokey_callout(void *arg)
+{
+	struct tcp_fastopen_callout *ctx = arg;
+
+	CURVNET_SET(ctx->v);
+	tcp_fastopen_autokey_locked();
+	callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz,
+		      tcp_fastopen_autokey_callout, ctx);
+	CURVNET_RESTORE();
+}
+
+
+static uint64_t
+tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc)
+{
+	SIPHASH_CTX ctx;
+	uint64_t siphash;
+
+	SipHash24_Init(&ctx);
+	SipHash_SetKey(&ctx, key);
+	switch (inc->inc_flags & INC_ISIPV6) {
+#ifdef INET
+	case 0:
+		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
+		break;
+#endif
+#ifdef INET6
+	case INC_ISIPV6:
+		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
+		break;
+#endif
+	}
+	SipHash_Final((u_int8_t *)&siphash, &ctx);
+
+	return (siphash);
+}
+
+
+/*
+ * Return values:
+ *	-1	the cookie is invalid and no valid cookie is available
+ *	 0	the cookie is invalid and the latest cookie has been returned
+ *	 1	the cookie is valid and the latest cookie has been returned
+ */
+int
+tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+    unsigned int len, uint64_t *latest_cookie)
+{
+	struct rm_priotracker tracker;
+	unsigned int i, key_index;
+	uint64_t cur_cookie;
+
+	if (V_tcp_fastopen_acceptany) {
+		*latest_cookie = 0;
+		return (1);
+	}
+
+	if (len != TCP_FASTOPEN_COOKIE_LEN) {
+		if (V_tcp_fastopen_numkeys > 0) {
+			*latest_cookie =
+			    tcp_fastopen_make_cookie(
+				V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest],
+				inc);
+			return (0);
+		}
+ 		return (-1);
+	}
+
+	/*
+	 * Check against each available key, from newest to oldest.
+	 */
+	TCP_FASTOPEN_KEYS_RLOCK(&tracker);
+	key_index = V_tcp_fastopen_keys.newest;
+	for (i = 0; i < V_tcp_fastopen_numkeys; i++) {
+		cur_cookie =
+		    tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index],
+			inc);
+		if (i == 0)
+			*latest_cookie = cur_cookie;
+		if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) {
+			TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+			return (1);
+		}
+		if (key_index == 0)
+			key_index = TCP_FASTOPEN_MAX_KEYS - 1;
+		else
+			key_index--;
+	}
+	TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
+
+	return (0);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	unsigned int new;
+
+	new = V_tcp_fastopen_autokey;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		if (new > (INT_MAX / hz))
+			return (EINVAL);
+
+		TCP_FASTOPEN_KEYS_WLOCK();
+		if (V_tcp_fastopen_enabled) {
+			if (V_tcp_fastopen_autokey && !new)
+				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+			else if (new)
+				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+				    new * hz, tcp_fastopen_autokey_callout,
+				    &V_tcp_fastopen_autokey_ctx);
+		}
+		V_tcp_fastopen_autokey = new;
+		TCP_FASTOPEN_KEYS_WUNLOCK();
+	}
+
+	return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	unsigned int new;
+
+	new = V_tcp_fastopen_enabled;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		if (V_tcp_fastopen_enabled && !new) {
+			/* enabled -> disabled */
+			TCP_FASTOPEN_KEYS_WLOCK();
+			V_tcp_fastopen_numkeys = 0;
+			V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+			if (V_tcp_fastopen_autokey)
+				callout_stop(&V_tcp_fastopen_autokey_ctx.c);
+			V_tcp_fastopen_enabled = 0;
+			TCP_FASTOPEN_KEYS_WUNLOCK();
+		} else if (!V_tcp_fastopen_enabled && new) {
+			/* disabled -> enabled */
+			TCP_FASTOPEN_KEYS_WLOCK();
+			if (V_tcp_fastopen_autokey &&
+			    (V_tcp_fastopen_numkeys == 0)) {
+				tcp_fastopen_autokey_locked();
+				callout_reset(&V_tcp_fastopen_autokey_ctx.c,
+				    V_tcp_fastopen_autokey * hz,
+				    tcp_fastopen_autokey_callout,
+				    &V_tcp_fastopen_autokey_ctx);
+			}
+			V_tcp_fastopen_enabled = 1;
+			TCP_FASTOPEN_KEYS_WUNLOCK();
+		}
+	}
+	return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS)
+{
+	int error;
+	uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
+
+	if (req->oldptr != NULL || req->oldlen != 0)
+		return (EINVAL);
+	if (req->newptr == NULL)
+		return (EPERM);
+	if (req->newlen != sizeof(newkey))
+		return (EINVAL);
+	error = SYSCTL_IN(req, newkey, sizeof(newkey));
+	if (error)
+		return (error);
+
+	TCP_FASTOPEN_KEYS_WLOCK();
+	tcp_fastopen_addkey_locked(newkey);
+	TCP_FASTOPEN_KEYS_WUNLOCK();
+
+	return (0);
+}

Added: head/sys/netinet/tcp_fastopen.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/netinet/tcp_fastopen.h	Thu Dec 24 19:09:48 2015	(r292706)
@@ -0,0 +1,47 @@
+/*-
+ * Copyright (c) 2015 Patrick Kelsey
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _TCP_FASTOPEN_H_
+#define _TCP_FASTOPEN_H_
+
+#ifdef _KERNEL
+
+#define	TCP_FASTOPEN_COOKIE_LEN	8	/* tied to SipHash24 64-bit output */
+
+VNET_DECLARE(unsigned int, tcp_fastopen_enabled);
+#define	V_tcp_fastopen_enabled	VNET(tcp_fastopen_enabled)
+
+void	tcp_fastopen_init(void);
+void	tcp_fastopen_destroy(void);
+unsigned int *tcp_fastopen_alloc_counter(void);
+void	tcp_fastopen_decrement_counter(unsigned int *counter);
+int	tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie,
+	    unsigned int len, uint64_t *latest_cookie);
+#endif /* _KERNEL */
+
+#endif /* _TCP_FASTOPEN_H_ */

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c	Thu Dec 24 18:53:17 2015	(r292705)
+++ head/sys/netinet/tcp_input.c	Thu Dec 24 19:09:48 2015	(r292706)
@@ -98,6 +98,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/nd6.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -999,7 +1002,8 @@ relocked:
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 #endif
 	if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
-	    (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
+	      (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
+	       !(tp->t_flags & TF_FASTOPEN)))) {
 		if (ti_locked == TI_UNLOCKED) {
 			if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
 				in_pcbref(inp);
@@ -1091,6 +1095,9 @@ relocked:
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 			}
+#ifdef TCP_RFC7413
+new_tfo_socket:
+#endif
 			if (so == NULL) {
 				/*
 				 * We completed the 3-way handshake
@@ -1353,7 +1360,12 @@ relocked:
 #endif
 		TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));
 		tcp_dooptions(&to, optp, optlen, TO_SYN);
+#ifdef TCP_RFC7413
+		if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+			goto new_tfo_socket;
+#else
 		syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+#endif
 		/*
 		 * Entry added to syncache and mbuf consumed.
 		 * Only the listen socket is unlocked by syncache_add().
@@ -1468,7 +1480,8 @@ tcp_do_segment(struct mbuf *m, struct tc
 	struct in_conninfo *inc;
 	struct mbuf *mfree;
 	struct tcpopt to;
-
+	int tfo_syn;
+	
 #ifdef TCPDEBUG
 	/*
 	 * The size of tcp_saveipgen must be the size of the max ip header,
@@ -1921,6 +1934,28 @@ tcp_do_segment(struct mbuf *m, struct tc
 				rstreason = BANDLIM_RST_OPENPORT;
 				goto dropwithreset;
 		}
+#ifdef TCP_RFC7413
+		if (tp->t_flags & TF_FASTOPEN) {
+			/*
+			 * When a TFO connection is in SYN_RECEIVED, the
+			 * only valid packets are the initial SYN, a
+			 * retransmit/copy of the initial SYN (possibly with
+			 * a subset of the original data), a valid ACK, a
+			 * FIN, or a RST.
+			 */
+			if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) {
+				rstreason = BANDLIM_RST_OPENPORT;
+				goto dropwithreset;
+			} else if (thflags & TH_SYN) {
+				/* non-initial SYN is ignored */
+				if ((tcp_timer_active(tp, TT_DELACK) || 
+				     tcp_timer_active(tp, TT_REXMT)))
+					goto drop;
+			} else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) {
+				goto drop;
+			}
+		}
+#endif
 		break;
 
 	/*
@@ -2136,7 +2171,8 @@ tcp_do_segment(struct mbuf *m, struct tc
 	 * RFC5961 Section 4.2
 	 * Send challenge ACK for any SYN in synchronized state.
 	 */
-	if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
+	if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
+	    tp->t_state != TCPS_SYN_RECEIVED) {
 		KASSERT(ti_locked == TI_RLOCKED,
 		    ("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@@ -2330,9 +2366,16 @@ tcp_do_segment(struct mbuf *m, struct tc
 	 */
 	if ((thflags & TH_ACK) == 0) {
 		if (tp->t_state == TCPS_SYN_RECEIVED ||
-		    (tp->t_flags & TF_NEEDSYN))
+		    (tp->t_flags & TF_NEEDSYN)) {
+#ifdef TCP_RFC7413
+			if (tp->t_state == TCPS_SYN_RECEIVED &&
+			    tp->t_flags & TF_FASTOPEN) {
+				tp->snd_wnd = tiwin;
+				cc_conn_init(tp);
+			}
+#endif
 			goto step6;
-		else if (tp->t_flags & TF_ACKNOW)
+		} else if (tp->t_flags & TF_ACKNOW)
 			goto dropafterack;
 		else
 			goto drop;
@@ -2371,7 +2414,27 @@ tcp_do_segment(struct mbuf *m, struct tc
 			tcp_state_change(tp, TCPS_ESTABLISHED);
 			TCP_PROBE5(accept__established, NULL, tp,
 			    mtod(m, const char *), tp, th);
-			cc_conn_init(tp);
+#ifdef TCP_RFC7413
+			if (tp->t_tfo_pending) {
+				tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+				tp->t_tfo_pending = NULL;
+
+				/*
+				 * Account for the ACK of our SYN prior to
+				 * regular ACK processing below.
+				 */ 
+				tp->snd_una++;
+			}
+			/*
+			 * TFO connections call cc_conn_init() during SYN
+			 * processing.  Calling it again here for such
+			 * connections is not harmless as it would undo the
+			 * snd_cwnd reduction that occurs when a TFO SYN|ACK
+			 * is retransmitted.
+			 */
+			if (!(tp->t_flags & TF_FASTOPEN))
+#endif
+				cc_conn_init(tp);
 			tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
 		}
 		/*
@@ -2919,7 +2982,9 @@ dodata:							/* XXX */
 	 * case PRU_RCVD).  If a FIN has already been received on this
 	 * connection then we just ignore the text.
 	 */
-	if ((tlen || (thflags & TH_FIN)) &&
+	tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
+		   (tp->t_flags & TF_FASTOPEN));
+	if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
 	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
 		tcp_seq save_start = th->th_seq;
 		m_adj(m, drop_hdrlen);	/* delayed header drop */
@@ -2937,8 +3002,9 @@ dodata:							/* XXX */
 		 */
 		if (th->th_seq == tp->rcv_nxt &&
 		    LIST_EMPTY(&tp->t_segq) &&
-		    TCPS_HAVEESTABLISHED(tp->t_state)) {
-			if (DELAY_ACK(tp, tlen))
+		    (TCPS_HAVEESTABLISHED(tp->t_state) ||
+		     tfo_syn)) {
+			if (DELAY_ACK(tp, tlen) || tfo_syn)
 				tp->t_flags |= TF_DELACK;
 			else
 				tp->t_flags |= TF_ACKNOW;
@@ -3293,6 +3359,21 @@ tcp_dooptions(struct tcpopt *to, u_char 
 			to->to_sacks = cp + 2;
 			TCPSTAT_INC(tcps_sack_rcv_blocks);
 			break;
+#ifdef TCP_RFC7413
+		case TCPOPT_FAST_OPEN:
+			if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) &&
+			    (optlen < TCPOLEN_FAST_OPEN_MIN) &&
+			    (optlen > TCPOLEN_FAST_OPEN_MAX))
+				continue;
+			if (!(flags & TO_SYN))
+				continue;
+			if (!V_tcp_fastopen_enabled)
+				continue;
+			to->to_flags |= TOF_FASTOPEN;
+			to->to_tfo_len = optlen - 2;
+			to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL;
+			break;
+#endif
 		default:
 			continue;
 		}

Modified: head/sys/netinet/tcp_output.c
==============================================================================
--- head/sys/netinet/tcp_output.c	Thu Dec 24 18:53:17 2015	(r292705)
+++ head/sys/netinet/tcp_output.c	Thu Dec 24 19:09:48 2015	(r292706)
@@ -68,6 +68,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #endif
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #define	TCPOUTFLAGS
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
@@ -204,6 +207,17 @@ tcp_output(struct tcpcb *tp)
 		return (tcp_offload_output(tp));
 #endif
 
+#ifdef TCP_RFC7413
+	/*
+	 * For TFO connections in SYN_RECEIVED, only allow the initial
+	 * SYN|ACK and those sent by the retransmit timer.
+	 */
+	if ((tp->t_flags & TF_FASTOPEN) &&
+	    (tp->t_state == TCPS_SYN_RECEIVED) &&
+	    SEQ_GT(tp->snd_max, tp->snd_una) &&    /* inital SYN|ACK sent */
+	    (tp->snd_nxt != tp->snd_una))          /* not a retransmit */
+		return (0);
+#endif
 	/*
 	 * Determine length of data that should be transmitted,
 	 * and flags that will be used.
@@ -390,6 +404,15 @@ after_sack_rexmit:
 	if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
 		if (tp->t_state != TCPS_SYN_RECEIVED)
 			flags &= ~TH_SYN;
+#ifdef TCP_RFC7413
+		/*
+		 * When sending additional segments following a TFO SYN|ACK,
+		 * do not include the SYN bit.
+		 */
+		if ((tp->t_flags & TF_FASTOPEN) &&
+		    (tp->t_state == TCPS_SYN_RECEIVED))
+			flags &= ~TH_SYN;
+#endif
 		off--, len++;
 	}
 
@@ -403,6 +426,17 @@ after_sack_rexmit:
 		flags &= ~TH_FIN;
 	}
 
+#ifdef TCP_RFC7413
+	/*
+	 * When retransmitting SYN|ACK on a passively-created TFO socket,
+	 * don't include data, as the presence of data may have caused the
+	 * original SYN|ACK to have been dropped by a middlebox.
+	 */
+	if ((tp->t_flags & TF_FASTOPEN) &&
+	    (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) ||
+	     (flags & TH_RST)))
+		len = 0;
+#endif
 	if (len <= 0) {
 		/*
 		 * If FIN has been sent but not acked,
@@ -725,6 +759,22 @@ send:
 			tp->snd_nxt = tp->iss;
 			to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc);
 			to.to_flags |= TOF_MSS;
+#ifdef TCP_RFC7413
+			/*
+			 * Only include the TFO option on the first
+			 * transmission of the SYN|ACK on a
+			 * passively-created TFO socket, as the presence of
+			 * the TFO option may have caused the original
+			 * SYN|ACK to have been dropped by a middlebox.
+			 */
+			if ((tp->t_flags & TF_FASTOPEN) &&
+			    (tp->t_state == TCPS_SYN_RECEIVED) &&
+			    (tp->t_rxtshift == 0)) {
+				to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
+				to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie;
+				to.to_flags |= TOF_FASTOPEN;
+			}
+#endif
 		}
 		/* Window scaling. */
 		if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
@@ -1004,7 +1054,7 @@ send:
 		 * give data to the user when a buffer fills or
 		 * a PUSH comes in.)
 		 */
-		if (off + len == sbused(&so->so_snd))
+		if ((off + len == sbused(&so->so_snd)) && !(flags & TH_SYN))
 			flags |= TH_PUSH;
 		SOCKBUF_UNLOCK(&so->so_snd);
 	} else {
@@ -1711,6 +1761,25 @@ tcp_addoptions(struct tcpopt *to, u_char
 			TCPSTAT_INC(tcps_sack_send_blocks);
 			break;
 			}
+#ifdef TCP_RFC7413
+		case TOF_FASTOPEN:
+			{
+			int total_len;
+
+			/* XXX is there any point to aligning this option? */
+			total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
+			if (TCP_MAXOLEN - optlen < total_len)
+				continue;
+			*optp++ = TCPOPT_FAST_OPEN;
+			*optp++ = total_len;
+			if (to->to_tfo_len > 0) {
+				bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
+				optp += to->to_tfo_len;
+			}
+			optlen += total_len;
+			break;
+			}
+#endif
 		default:
 			panic("%s: unknown TCP option type", __func__);
 			break;

Modified: head/sys/netinet/tcp_subr.c
==============================================================================
--- head/sys/netinet/tcp_subr.c	Thu Dec 24 18:53:17 2015	(r292705)
+++ head/sys/netinet/tcp_subr.c	Thu Dec 24 19:09:48 2015	(r292706)
@@ -84,6 +84,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/nd6.h>
 #endif
 
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -704,6 +707,10 @@ tcp_init(void)
 #ifdef TCPPCAP
 	tcp_pcap_init();
 #endif
+
+#ifdef TCP_RFC7413
+	tcp_fastopen_init();
+#endif
 }
 
 #ifdef VIMAGE
@@ -712,6 +719,9 @@ tcp_destroy(void)
 {
 	int error;
 
+#ifdef TCP_RFC7413
+	tcp_fastopen_destroy();
+#endif
 	tcp_hc_destroy();
 	syncache_destroy();
 	tcp_tw_destroy();
@@ -1439,6 +1449,17 @@ tcp_close(struct tcpcb *tp)
 	if (tp->t_state == TCPS_LISTEN)
 		tcp_offload_listen_stop(tp);
 #endif
+#ifdef TCP_RFC7413
+	/*
+	 * This releases the TFO pending counter resource for TFO listen
+	 * sockets as well as passively-created TFO sockets that transition
+	 * from SYN_RECEIVED to CLOSED.
+	 */
+	if (tp->t_tfo_pending) {
+		tcp_fastopen_decrement_counter(tp->t_tfo_pending);
+		tp->t_tfo_pending = NULL;
+	}
+#endif
 	in_pcbdrop(inp);
 	TCPSTAT_INC(tcps_closed);
 	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));

Modified: head/sys/netinet/tcp_syncache.c
==============================================================================
--- head/sys/netinet/tcp_syncache.c	Thu Dec 24 18:53:17 2015	(r292705)
+++ head/sys/netinet/tcp_syncache.c	Thu Dec 24 19:09:48 2015	(r292706)
@@ -81,6 +81,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/tcp.h>
+#ifdef TCP_RFC7413
+#include <netinet/tcp_fastopen.h>
+#endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -1083,6 +1086,39 @@ failed:
 	return (0);
 }
 
+#ifdef TCP_RFC7413
+static void
+syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+    uint64_t response_cookie)
+{
+	struct inpcb *inp;
+	struct tcpcb *tp;
+	unsigned int *pending_counter;
+
+	/*
+	 * Global TCP locks are held because we manipulate the PCB lists
+	 * and create a new socket.
+	 */
+	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+	pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
+	*lsop = syncache_socket(sc, *lsop, m);
+	if (*lsop == NULL) {
+		TCPSTAT_INC(tcps_sc_aborted);
+		atomic_subtract_int(pending_counter, 1);
+	} else {
+		inp = sotoinpcb(*lsop);
+		tp = intotcpcb(inp);
+		tp->t_flags |= TF_FASTOPEN;
+		tp->t_tfo_cookie = response_cookie;
+		tp->snd_max = tp->iss;
+		tp->snd_nxt = tp->iss;
+		tp->t_tfo_pending = pending_counter;
+		TCPSTAT_INC(tcps_sc_completed);
+	}
+}
+#endif /* TCP_RFC7413 */
+
 /*
  * Given a LISTEN socket and an inbound SYN request, add
  * this to the syn cache, and send back a segment:
@@ -1095,8 +1131,15 @@ failed:
  * DoS attack, an attacker could send data which would eventually
  * consume all available buffer space if it were ACKed.  By not ACKing
  * the data, we avoid this DoS scenario.
+ *
+ * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
+ * cookie is processed, V_tcp_fastopen_enabled set to true, and the
+ * TCP_FASTOPEN socket option is set.  In this case, a new socket is created
+ * and returned via lsop, the mbuf is not freed so that tcp_input() can
+ * queue its data to the socket, and 1 is returned to indicate the
+ * TFO-socket-creation path was taken.
  */
-void
+int
 syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
     void *todctx)
@@ -1109,6 +1152,7 @@ syncache_add(struct in_conninfo *inc, st
 	u_int ltflags;
 	int win, sb_hiwat, ip_ttl, ip_tos;
 	char *s;
+	int rv = 0;
 #ifdef INET6
 	int autoflowlabel = 0;
 #endif
@@ -1117,6 +1161,11 @@ syncache_add(struct in_conninfo *inc, st
 #endif
 	struct syncache scs;
 	struct ucred *cred;
+#ifdef TCP_RFC7413
+	uint64_t tfo_response_cookie;
+	int tfo_cookie_valid = 0;
+	int tfo_response_cookie_valid = 0;
+#endif
 
 	INP_WLOCK_ASSERT(inp);			/* listen socket */
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
@@ -1141,6 +1190,29 @@ syncache_add(struct in_conninfo *inc, st
 	sb_hiwat = so->so_rcv.sb_hiwat;
 	ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
 
+#ifdef TCP_RFC7413
+	if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) &&
+	    (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
+		/*
+		 * Limit the number of pending TFO connections to
+		 * approximately half of the queue limit.  This prevents TFO
+		 * SYN floods from starving the service by filling the
+		 * listen queue with bogus TFO connections.
+		 */
+		if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201512241909.tBOJ9nlX070850>