From owner-svn-src-stable-11@freebsd.org Mon Aug 29 18:00:15 2016 Return-Path: Delivered-To: svn-src-stable-11@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 69299BC7B0D; Mon, 29 Aug 2016 18:00:15 +0000 (UTC) (envelope-from hiren@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 4377EBD8; Mon, 29 Aug 2016 18:00:15 +0000 (UTC) (envelope-from hiren@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u7TI0Ec0037463; Mon, 29 Aug 2016 18:00:14 GMT (envelope-from hiren@FreeBSD.org) Received: (from hiren@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u7TI0E3m037460; Mon, 29 Aug 2016 18:00:14 GMT (envelope-from hiren@FreeBSD.org) Message-Id: <201608291800.u7TI0E3m037460@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: hiren set sender to hiren@FreeBSD.org using -f From: Hiren Panchasara Date: Mon, 29 Aug 2016 18:00:14 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r305008 - in stable/11/sys: netinet netinet6 X-SVN-Group: stable-11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable-11@freebsd.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: SVN commit messages for only the 11-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 29 Aug 2016 18:00:15 -0000 Author: hiren Date: Mon Aug 29 18:00:14 2016 New Revision: 305008 URL: https://svnweb.freebsd.org/changeset/base/305008 Log: MFC r303626 (by gallatin) Rework IPV6 TCP path MTU discovery to match IPv4. No objection: gallatin Modified: stable/11/sys/netinet/tcp_subr.c stable/11/sys/netinet6/icmp6.c stable/11/sys/netinet6/ip6_output.c Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/netinet/tcp_subr.c ============================================================================== --- stable/11/sys/netinet/tcp_subr.c Mon Aug 29 17:50:36 2016 (r305007) +++ stable/11/sys/netinet/tcp_subr.c Mon Aug 29 18:00:14 2016 (r305008) @@ -78,6 +78,7 @@ __FBSDID("$FreeBSD$"); #include #include #ifdef INET6 +#include #include #include #include @@ -2040,72 +2041,146 @@ tcp_ctlinput(int cmd, struct sockaddr *s void tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) { - struct tcphdr th; + struct in6_addr *dst; + struct tcphdr *th; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; struct ip6_hdr *ip6; struct mbuf *m; + struct inpcb *inp; + struct tcpcb *tp; + struct icmp6_hdr *icmp6; struct ip6ctlparam *ip6cp = NULL; const struct sockaddr_in6 *sa6_src = NULL; - int off; - struct tcp_portonly { - u_int16_t th_sport; - u_int16_t th_dport; - } *thp; + struct in_conninfo inc; + tcp_seq icmp_tcp_seq; + unsigned int mtu; + unsigned int off; + if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; - if (cmd == PRC_MSGSIZE) - notify = tcp_mtudisc_notify; - else if (!PRC_IS_REDIRECT(cmd) && - ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)) - return; - /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; + icmp6 = ip6cp->ip6c_icmp6; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; sa6_src = ip6cp->ip6c_src; + dst = ip6cp->ip6c_finaldst; } else { m = NULL; ip6 = NULL; off = 0; /* fool gcc */ sa6_src = &sa6_any; + dst = NULL; } - if (ip6 != NULL) { - struct in_conninfo inc; - /* - * XXX: We assume that when IPV6 is non NULL, - * M and OFF are valid. - */ + if (cmd == PRC_MSGSIZE) + notify = tcp_mtudisc_notify; + else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || + cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && + ip6 != NULL) + notify = tcp_drop_syn_sent; - /* check if we can safely examine src and dst ports */ - if (m->m_pkthdr.len < off + sizeof(*thp)) - return; + /* + * Hostdead is ugly because it goes linearly through all PCBs. + * XXX: We never get this from ICMP, otherwise it makes an + * excellent DoS attack on machines with many connections. + */ + else if (cmd == PRC_HOSTDEAD) + ip6 = NULL; + else if ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0) + return; - bzero(&th, sizeof(th)); - m_copydata(m, off, sizeof(*thp), (caddr_t)&th); + if (ip6 == NULL) { + in6_pcbnotify(&V_tcbinfo, sa, 0, + (const struct sockaddr *)sa6_src, + 0, cmd, NULL, notify); + return; + } - in6_pcbnotify(&V_tcbinfo, sa, th.th_dport, - (struct sockaddr *)ip6cp->ip6c_src, - th.th_sport, cmd, NULL, notify); + /* Check if we can safely get the ports from the tcp hdr */ + if (m == NULL || + (m->m_pkthdr.len < + (int32_t) (off + offsetof(struct tcphdr, th_seq)))) { + return; + } + th = (struct tcphdr *) mtodo(ip6cp->ip6c_m, ip6cp->ip6c_off); + INP_INFO_RLOCK(&V_tcbinfo); + inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, th->th_dport, + &ip6->ip6_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL); + if (inp != NULL && PRC_IS_REDIRECT(cmd)) { + /* signal EHOSTDOWN, as it flushes the cached route */ + inp = (*notify)(inp, EHOSTDOWN); + if (inp != NULL) + INP_WUNLOCK(inp); + } else if (inp != NULL) { + if (!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED) && + !(inp->inp_socket == NULL)) { + icmp_tcp_seq = ntohl(th->th_seq); + tp = intotcpcb(inp); + if (SEQ_GEQ(icmp_tcp_seq, tp->snd_una) && + SEQ_LT(icmp_tcp_seq, tp->snd_max)) { + if (cmd == PRC_MSGSIZE) { + /* + * MTU discovery: + * If we got a needfrag set the MTU + * in the route to the suggested new + * value (if given) and then notify. + */ + mtu = ntohl(icmp6->icmp6_mtu); + /* + * If no alternative MTU was + * proposed, or the proposed + * MTU was too small, set to + * the min. + */ + if (mtu < IPV6_MMTU) + mtu = IPV6_MMTU - 8; + + + bzero(&inc, sizeof(inc)); + inc.inc_fibnum = M_GETFIB(m); + inc.inc_flags |= INC_ISIPV6; + inc.inc6_faddr = *dst; + if (in6_setscope(&inc.inc6_faddr, + m->m_pkthdr.rcvif, NULL)) + goto unlock_inp; + + /* + * Only process the offered MTU if it + * is smaller than the current one. + */ + if (mtu < tp->t_maxseg + + (sizeof (*th) + sizeof (*ip6))) { + tcp_hc_updatemtu(&inc, mtu); + tcp_mtudisc(inp, mtu); + ICMP6STAT_INC(icp6s_pmtuchg); + } + } else + inp = (*notify)(inp, + inet6ctlerrmap[cmd]); + } + } +unlock_inp: + if (inp != NULL) + INP_WUNLOCK(inp); + } else { bzero(&inc, sizeof(inc)); - inc.inc_fport = th.th_dport; - inc.inc_lport = th.th_sport; - inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr; - inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr; + inc.inc_fibnum = M_GETFIB(m); inc.inc_flags |= INC_ISIPV6; - INP_INFO_RLOCK(&V_tcbinfo); - syncache_unreach(&inc, &th); - INP_INFO_RUNLOCK(&V_tcbinfo); - } else - in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src, - 0, cmd, NULL, notify); + inc.inc_fport = th->th_dport; + inc.inc_lport = th->th_sport; + inc.inc6_faddr = *dst; + inc.inc6_laddr = ip6->ip6_src; + syncache_unreach(&inc, th); + } + INP_INFO_RUNLOCK(&V_tcbinfo); } #endif /* INET6 */ Modified: stable/11/sys/netinet6/icmp6.c ============================================================================== --- stable/11/sys/netinet6/icmp6.c Mon Aug 29 17:50:36 2016 (r305007) +++ stable/11/sys/netinet6/icmp6.c Mon Aug 29 18:00:14 2016 (r305008) @@ -485,15 +485,13 @@ icmp6_input(struct mbuf **mp, int *offp, icmp6_ifstat_inc(ifp, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: + case ICMP6_DST_UNREACH_ADDR: /* PRC_HOSTDEAD is a DOS */ code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(ifp, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ break; - case ICMP6_DST_UNREACH_ADDR: - code = PRC_HOSTDEAD; - break; case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_PARAMPROB; Modified: stable/11/sys/netinet6/ip6_output.c ============================================================================== --- stable/11/sys/netinet6/ip6_output.c Mon Aug 29 17:50:36 2016 (r305007) +++ stable/11/sys/netinet6/ip6_output.c Mon Aug 29 18:00:14 2016 (r305008) @@ -150,9 +150,10 @@ static int ip6_insertfraghdr(struct mbuf static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t); static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *); static int ip6_getpmtu(struct route_in6 *, int, - struct ifnet *, const struct in6_addr *, u_long *, int *, u_int); + struct ifnet *, const struct in6_addr *, u_long *, int *, u_int, + u_int); static int ip6_calcmtu(struct ifnet *, const struct in6_addr *, u_long, - u_long *, int *); + u_long *, int *, u_int); static int ip6_getpmtu_ctl(u_int, const struct in6_addr *, u_long *); static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int); @@ -718,7 +719,7 @@ again: /* Determine path MTU. */ if ((error = ip6_getpmtu(ro_pmtu, ro != ro_pmtu, ifp, &ip6->ip6_dst, - &mtu, &alwaysfrag, fibnum)) != 0) + &mtu, &alwaysfrag, fibnum, *nexthdrp)) != 0) goto bad; /* @@ -1251,7 +1252,7 @@ ip6_getpmtu_ctl(u_int fibnum, const stru ifp = nh6.nh_ifp; mtu = nh6.nh_mtu; - error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL); + error = ip6_calcmtu(ifp, dst, mtu, mtup, NULL, 0); fib6_free_nh_ext(fibnum, &nh6); return (error); @@ -1270,7 +1271,7 @@ ip6_getpmtu_ctl(u_int fibnum, const stru static int ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup, struct ifnet *ifp, const struct in6_addr *dst, u_long *mtup, - int *alwaysfragp, u_int fibnum) + int *alwaysfragp, u_int fibnum, u_int proto) { struct nhop6_basic nh6; struct in6_addr kdst; @@ -1308,7 +1309,7 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, i if (ro_pmtu->ro_rt) mtu = ro_pmtu->ro_rt->rt_mtu; - return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp)); + return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto)); } /* @@ -1320,7 +1321,7 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, i */ static int ip6_calcmtu(struct ifnet *ifp, const struct in6_addr *dst, u_long rt_mtu, - u_long *mtup, int *alwaysfragp) + u_long *mtup, int *alwaysfragp, u_int proto) { u_long mtu = 0; int alwaysfrag = 0; @@ -1335,7 +1336,11 @@ ip6_calcmtu(struct ifnet *ifp, const str inc.inc6_faddr = *dst; ifmtu = IN6_LINKMTU(ifp); - mtu = tcp_hc_getmtu(&inc); + + /* TCP is known to react to pmtu changes so skip hc */ + if (proto != IPPROTO_TCP) + mtu = tcp_hc_getmtu(&inc); + if (mtu) mtu = min(mtu, rt_mtu); else