Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 Feb 2001 18:54:12 -0600
From:      Jonathan Lemon <jlemon@flugsvamp.com>
To:        net@freebsd.org
Subject:   ICMP unreachables, take II.
Message-ID:  <20010222185412.E5714@prism.flugsvamp.com>

next in thread | raw e-mail | index | archive | help

I recently had a bug report regarding kqueue, where the kevent() call
for a TCP socket would return because so_error was set, but the
connection was still valid.

The cause of this was because a non-blocking connect() call was made,
and then the socket was monitored for writability.  However, an ICMP
error was returned, which eventually (after several retransmits) caused
so_error to be set in tcp_notify() without changing the connection state.

However, it doesn't really seem reasonable to leave the connection
pending in a SYN_SENT state at this point.  From the user's perspective,
the select/kevent call returns, indicating writability, but the next
operation (probably write) would fail, returning the contents of so_error.

I would propose that instead of this behavior, the connection should 
be dropped instead.


Also, RFC 1122 indicates that the application layer SHOULD report
soft errors, and it seems that a half-hearted attempt is made in 
tcp_notify(), by calling so{rw}wakeup.

However, this also seems to be incorrect, since select will not return
and any tests performed on the socket state will show no change, so it
seems that the wakeup calls should be removed.

Also, (still reading?) while I'm in this bit of code, we currently
react to all ICMP unreachable errors during setup of a connection;
this is incorrect, only port/protocol and administrative icmp subtypes
should be valid, so fix this as well.  In this case, return ENETRESET
as the error code to the user layer.


Finally, ICMP errors should never be allowed to kill an existing TCP
connection; if an administrative filter is installed across some existing
flows, then those flows should be allowed to time out per the TCP protocol.

Patch attached, please review.
--
Jonathan


Index: ip_icmp.c
===================================================================
RCS file: /ncvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.52
diff -u -r1.52 ip_icmp.c
--- ip_icmp.c	2001/02/18 09:34:51	1.52
+++ ip_icmp.c	2001/02/22 23:48:25
@@ -315,67 +315,35 @@
 	case ICMP_UNREACH:
 		switch (code) {
 			case ICMP_UNREACH_NET:
-				code = PRC_UNREACH_HOST;
-				break;
-
 			case ICMP_UNREACH_HOST:
-				code = PRC_UNREACH_HOST;
-				break;
-
-			case ICMP_UNREACH_PROTOCOL:
-				code = PRC_UNREACH_HOST;
-				break;
-
-			case ICMP_UNREACH_PORT:
-				code = PRC_UNREACH_HOST;
-				break;
-
 			case ICMP_UNREACH_SRCFAIL:
-				code = PRC_UNREACH_HOST;
+			case ICMP_UNREACH_NET_UNKNOWN:
+			case ICMP_UNREACH_HOST_UNKNOWN:
+			case ICMP_UNREACH_ISOLATED:
+			case ICMP_UNREACH_TOSNET:
+			case ICMP_UNREACH_TOSHOST:
+			case ICMP_UNREACH_HOST_PRECEDENCE:
+			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
+				code = PRC_UNREACH_NET;
 				break;
 
 			case ICMP_UNREACH_NEEDFRAG:
 				code = PRC_MSGSIZE;
 				break;
 
-			case ICMP_UNREACH_NET_UNKNOWN:
-				code = PRC_UNREACH_HOST;
-				break;
-
-			case ICMP_UNREACH_NET_PROHIB:
+			/*
+			 * RFC 1122, Sections 3.2.2.1 and 4.2.3.9.
+			 * Treat subcodes 2,3 as immediate RST
+			 */
+			case ICMP_UNREACH_PROTOCOL:
+			case ICMP_UNREACH_PORT:
 				code = PRC_UNREACH_ADMIN_PROHIB;
 				break;
 
-			case ICMP_UNREACH_TOSNET:
-				code = PRC_UNREACH_HOST;
-				break;
-
-			case ICMP_UNREACH_HOST_UNKNOWN:
-				code = PRC_UNREACH_HOST;
-				break;
-
-			case ICMP_UNREACH_ISOLATED:
-				code = PRC_UNREACH_HOST;
-				break;
-
+			case ICMP_UNREACH_NET_PROHIB:
 			case ICMP_UNREACH_HOST_PROHIB:
-				code = PRC_UNREACH_ADMIN_PROHIB;
-				break;
-
-			case ICMP_UNREACH_TOSHOST:
-				code = PRC_UNREACH_HOST;
-				break;
-
 			case ICMP_UNREACH_FILTER_PROHIB:
 				code = PRC_UNREACH_ADMIN_PROHIB;
-				break;
-
-			case ICMP_UNREACH_HOST_PRECEDENCE:
-				code = PRC_UNREACH_HOST;
-				break;
-
-			case ICMP_UNREACH_PRECEDENCE_CUTOFF:
-				code = PRC_UNREACH_HOST;
 				break;
 
 			default:
Index: ip_input.c
===================================================================
RCS file: /ncvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.154
diff -u -r1.154 ip_input.c
--- ip_input.c	2001/02/21 16:59:47	1.154
+++ ip_input.c	2001/02/23 00:14:39
@@ -1429,7 +1429,7 @@
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		0,		0,
-	ENOPROTOOPT
+	ENOPROTOOPT,	ENETRESET
 };
 
 /*
Index: tcp_subr.c
===================================================================
RCS file: /ncvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.91
diff -u -r1.91 tcp_subr.c
--- tcp_subr.c	2001/02/22 21:23:45	1.91
+++ tcp_subr.c	2001/02/22 23:43:33
@@ -134,32 +134,9 @@
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, 
     &tcbinfo.ipi_count, 0, "Number of active PCBs");
 
-/*
- * Treat ICMP unreachables like a TCP RST as required by rfc1122 section 3.2.2.1
- *
- * Administatively prohibited kill's sessions regardless of
- * their current state, other unreachable by default only kill 
- * sessions if they are in SYN-SENT state, this ensure temporary 
- * routing problems doesn't kill existing TCP sessions.
- * This can be overridden by icmp_like_rst_syn_sent_only.
- */
- 
-static int	icmp_unreach_like_rst = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_unreach_like_rst, CTLFLAG_RW,
-	&icmp_unreach_like_rst, 0, 
-	"Treat ICMP unreachable messages like TCP RST, rfc1122 section 3.2.2.1");
-
-/*
- * Control if ICMP unreachable messages other that administratively prohibited
- * ones will kill sessions not in SYN-SENT state.
- *
- * Has no effect unless icmp_unreach_like_rst is enabled.
- */
-
-static int	icmp_like_rst_syn_sent_only = 1;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_like_rst_syn_sent_only, CTLFLAG_RW,
-	&icmp_like_rst_syn_sent_only, 0, 
-	"When icmp_unreach_like_rst is enabled, only act on sessions in SYN-SENT state");
+static int	icmp_may_rst = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_RW, &icmp_may_rst, 0, 
+    "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 static void	tcp_cleartaocache __P((void));
 static void	tcp_notify __P((struct inpcb *, int));
@@ -775,14 +752,16 @@
  * Notify a tcp user of an asynchronous error;
  * store error as soft error, but wake up user
  * (for now, won't do anything until can select for soft error).
+ *
+ * Do not wake up user since there currently is no mechanism for
+ * reporting soft errors.
  */
 static void
 tcp_notify(inp, error)
 	struct inpcb *inp;
 	int error;
 {
-	register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
-	register struct socket *so = inp->inp_socket;
+	struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
 
 	/*
 	 * Ignore some errors if we are hooked up.
@@ -797,12 +776,14 @@
 		return;
 	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 	    tp->t_softerror)
-		so->so_error = error;
+		tcp_drop(tp, error);
 	else
 		tp->t_softerror = error;
+#if 0
 	wakeup((caddr_t) &so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
+#endif
 }
 
 static int
@@ -993,35 +974,17 @@
 	struct sockaddr *sa;
 	void *vip;
 {
-	register struct ip *ip = vip;
-	register struct tcphdr *th;
+	struct ip *ip = vip;
+	struct tcphdr *th;
 	void (*notify) __P((struct inpcb *, int)) = tcp_notify;
 	tcp_seq tcp_sequence = 0;
 	int tcp_seq_check = 0;
 
 	if (cmd == PRC_QUENCH)
 		notify = tcp_quench;
-	else if ((icmp_unreach_like_rst == 1) && ((cmd == PRC_UNREACH_HOST) ||
-			(cmd == PRC_UNREACH_ADMIN_PROHIB)) && (ip) && 
-			((IP_VHL_HL(ip->ip_vhl) << 2) == sizeof(struct ip))) {
-		/*
-		 * Only go here if the length of the IP header in the ICMP packet
-		 * is 20 bytes, that is it doesn't have options, if it does have
-		 * options, we will not have the first 8 bytes of the TCP header,
-		 * and thus we cannot match against TCP source/destination port
-		 * numbers and TCP sequence number.
-		 *
-		 * If PRC_UNREACH_ADMIN_PROHIB drop session regardsless of current
-		 * state, else we check the sysctl icmp_like_rst_syn_sent_only to
-		 * see if we should drop the session only in SYN-SENT state, or
-		 * in all states.
-		 */
+	else if (icmp_may_rst && cmd == PRC_UNREACH_ADMIN_PROHIB && ip) {
 		tcp_seq_check = 1;
-		if (cmd == PRC_UNREACH_ADMIN_PROHIB) {
-			notify = tcp_drop_all_states;
-		} else {
-			notify = tcp_drop_syn_sent;
-		}
+		notify = tcp_drop_syn_sent;
 	} else if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc;
 	else if (PRC_IS_REDIRECT(cmd)) {
@@ -1173,10 +1136,9 @@
 }
 
 /*
- * When a ICMP unreachable is recieved, drop the
- * TCP connection, depending on the sysctl
- * icmp_like_rst_syn_sent_only, it only drops
- * the session if it's in SYN-SENT state
+ * When a specific ICMP unreachable message is received and the
+ * connection state is SYN-SENT, drop the connection.  This behavior
+ * is controlled by the icmp_may_rst sysctl.
  */
 void
 tcp_drop_syn_sent(inp, errno)
@@ -1184,22 +1146,8 @@
 	int errno;
 {
 	struct tcpcb *tp = intotcpcb(inp);
-	if((tp) && ((icmp_like_rst_syn_sent_only == 0) || 
-			(tp->t_state == TCPS_SYN_SENT)))
-		tcp_drop(tp, errno);
-}
 
-/*
- * When a ICMP unreachable is recieved, drop the
- * TCP connection, regardless of the state.
- */
-void
-tcp_drop_all_states(inp, errno)
-	struct inpcb *inp;
-	int errno;
-{
-	struct tcpcb *tp = intotcpcb(inp);
-	if(tp)
+	if (tp && tp->t_state == TCPS_SYN_SENT)
 		tcp_drop(tp, errno);
 }
 

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-net" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20010222185412.E5714>