Date: Thu, 23 Oct 2003 19:52:15 -0700 (PDT) From: Sam Leffler <sam@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 40358 for review Message-ID: <200310240252.h9O2qF2g012941@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=40358 Change 40358 by sam@sam_ebb on 2003/10/23 19:51:36 revert tcp hostcache changes so they can be incorporated on a separate branch Affected files ... .. //depot/projects/netperf/sys/conf/files#17 edit .. //depot/projects/netperf/sys/net/if_arcsubr.c#3 edit .. //depot/projects/netperf/sys/net/if_ef.c#3 edit .. //depot/projects/netperf/sys/net/if_ethersubr.c#9 edit .. //depot/projects/netperf/sys/net/if_faith.c#6 edit .. //depot/projects/netperf/sys/net/if_fddisubr.c#4 edit .. //depot/projects/netperf/sys/net/if_iso88025subr.c#4 edit .. //depot/projects/netperf/sys/net/if_loop.c#9 edit .. //depot/projects/netperf/sys/net/if_ppp.c#4 edit .. //depot/projects/netperf/sys/net/route.c#18 edit .. //depot/projects/netperf/sys/net/route.h#8 edit .. //depot/projects/netperf/sys/net/rtsock.c#7 edit .. //depot/projects/netperf/sys/netatalk/ddp_output.c#4 edit .. //depot/projects/netperf/sys/netinet/icmp_var.h#3 edit .. //depot/projects/netperf/sys/netinet/in_pcb.c#7 edit .. //depot/projects/netperf/sys/netinet/in_pcb.h#8 edit .. //depot/projects/netperf/sys/netinet/in_rmx.c#9 edit .. //depot/projects/netperf/sys/netinet/in_var.h#4 edit .. //depot/projects/netperf/sys/netinet/ip_divert.c#8 edit .. //depot/projects/netperf/sys/netinet/ip_flow.c#6 add .. //depot/projects/netperf/sys/netinet/ip_flow.h#5 add .. //depot/projects/netperf/sys/netinet/ip_fw.h#3 edit .. //depot/projects/netperf/sys/netinet/ip_fw2.c#12 edit .. //depot/projects/netperf/sys/netinet/ip_icmp.c#7 edit .. //depot/projects/netperf/sys/netinet/ip_input.c#15 edit .. //depot/projects/netperf/sys/netinet/ip_output.c#11 edit .. //depot/projects/netperf/sys/netinet/ip_var.h#9 edit .. //depot/projects/netperf/sys/netinet/raw_ip.c#8 edit .. //depot/projects/netperf/sys/netinet/tcp.h#3 edit .. //depot/projects/netperf/sys/netinet/tcp_input.c#7 edit .. //depot/projects/netperf/sys/netinet/tcp_output.c#4 edit .. //depot/projects/netperf/sys/netinet/tcp_subr.c#6 edit .. //depot/projects/netperf/sys/netinet/tcp_syncache.c#7 edit .. //depot/projects/netperf/sys/netinet/tcp_timer.c#3 edit .. //depot/projects/netperf/sys/netinet/tcp_usrreq.c#4 edit .. //depot/projects/netperf/sys/netinet/tcp_var.h#3 edit .. //depot/projects/netperf/sys/netinet/udp_usrreq.c#7 edit .. //depot/projects/netperf/sys/netinet6/icmp6.c#11 edit .. //depot/projects/netperf/sys/netinet6/in6_pcb.c#10 edit .. //depot/projects/netperf/sys/netinet6/in6_rmx.c#10 edit .. //depot/projects/netperf/sys/netinet6/in6_src.c#13 edit .. //depot/projects/netperf/sys/netinet6/ip6_forward.c#11 edit .. //depot/projects/netperf/sys/netinet6/ip6_input.c#13 edit .. //depot/projects/netperf/sys/netinet6/ip6_output.c#18 edit .. //depot/projects/netperf/sys/netinet6/raw_ip6.c#5 edit .. //depot/projects/netperf/sys/netinet6/udp6_output.c#4 edit .. //depot/projects/netperf/sys/netipx/ipx_input.c#5 edit .. //depot/projects/netperf/sys/netipx/ipx_outputfl.c#3 edit Differences ... ==== //depot/projects/netperf/sys/conf/files#17 (text+ko) ==== @@ -1424,7 +1424,7 @@ netinet/ip_ecn.c optional inet6 netinet/ip_encap.c optional inet netinet/ip_encap.c optional inet6 -netinet/ip_fastforward.c optional inet +netinet/ip_flow.c optional inet netinet/ip_fw2.c optional ipfirewall netinet/ip_icmp.c optional inet netinet/ip_input.c optional inet @@ -1432,7 +1432,6 @@ netinet/ip_output.c optional inet netinet/raw_ip.c optional inet netinet/tcp_debug.c optional tcpdebug -netinet/tcp_hostcache.c optional inet netinet/tcp_input.c optional inet netinet/tcp_output.c optional inet netinet/tcp_subr.c optional inet ==== //depot/projects/netperf/sys/net/if_arcsubr.c#3 (text+ko) ==== @@ -543,14 +543,14 @@ #ifdef INET case ARCTYPE_IP: m_adj(m, ARC_HDRNEWLEN); - if (ip_fastforward(m)) + if (ipflow_fastforward(m)) return; isr = NETISR_IP; break; case ARCTYPE_IP_OLD: m_adj(m, ARC_HDRLEN); - if (ip_fastforward(m)) + if (ipflow_fastforward(m)) return; isr = NETISR_IP; break; ==== //depot/projects/netperf/sys/net/if_ef.c#3 (text+ko) ==== @@ -252,8 +252,8 @@ #endif #ifdef INET case ETHERTYPE_IP: - if (ip_fastforward(m)) - return; + if (ipflow_fastforward(m)) + return (0); isr = NETISR_IP; break; ==== //depot/projects/netperf/sys/net/if_ethersubr.c#9 (text+ko) ==== @@ -717,7 +717,7 @@ switch (ether_type) { #ifdef INET case ETHERTYPE_IP: - if (ip_fastforward(m)) + if (ipflow_fastforward(m)) return; isr = NETISR_IP; break; ==== //depot/projects/netperf/sys/net/if_faith.c#6 (text+ko) ==== @@ -271,8 +271,17 @@ struct rt_addrinfo *info; { RT_LOCK_ASSERT(rt); - if (rt) - rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; + + if (rt) { + rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */ + /* + * For optimal performance, the send and receive buffers + * should be at least twice the MTU plus a little more for + * overhead. + */ + rt->rt_rmx.rmx_recvpipe = + rt->rt_rmx.rmx_sendpipe = 3 * FAITHMTU; + } } /* ==== //depot/projects/netperf/sys/net/if_fddisubr.c#4 (text+ko) ==== @@ -471,7 +471,7 @@ switch (type) { #ifdef INET case ETHERTYPE_IP: - if (ip_fastforward(m)) + if (ipflow_fastforward(m)) return; isr = NETISR_IP; break; ==== //depot/projects/netperf/sys/net/if_iso88025subr.c#4 (text+ko) ==== @@ -556,7 +556,7 @@ #ifdef INET case ETHERTYPE_IP: th->iso88025_shost[0] &= ~(TR_RII); - if (ip_fastforward(m)) + if (ipflow_fastforward(m)) return; isr = NETISR_IP; break; ==== //depot/projects/netperf/sys/net/if_loop.c#9 (text+ko) ==== @@ -357,8 +357,17 @@ struct rt_addrinfo *info; { RT_LOCK_ASSERT(rt); - if (rt) - rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; + + if (rt) { + rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */ + /* + * For optimal performance, the send and receive buffers + * should be at least twice the MTU plus a little more for + * overhead. + */ + rt->rt_rmx.rmx_recvpipe = + rt->rt_rmx.rmx_sendpipe = 3 * LOMTU; + } } /* ==== //depot/projects/netperf/sys/net/if_ppp.c#4 (text+ko) ==== @@ -1538,8 +1538,8 @@ m->m_pkthdr.len -= PPP_HDRLEN; m->m_data += PPP_HDRLEN; m->m_len -= PPP_HDRLEN; - if (ip_fastforward(m)) - return; + if (ipflow_fastforward(m)) + return; isr = NETISR_IP; break; #endif ==== //depot/projects/netperf/sys/net/route.c#18 (text+ko) ==== @@ -139,7 +139,7 @@ */ newrt = rt = (struct rtentry *)rn; nflags = rt->rt_flags & ~ignflags; - if (report && (nflags & RTF_CLONING)) { + if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) { /* * We are apparently adding (report = 0 in delete). * If it requires that it be cloned, do so. @@ -548,7 +548,7 @@ */ if (flags & RTF_HOST) { netmask = 0; - flags &= ~RTF_CLONING; + flags &= ~(RTF_CLONING | RTF_PRCLONING); } switch (req) { case RTM_DELETE: @@ -570,7 +570,7 @@ * Now search what's left of the subtree for any cloned * routes which might have been formed from this node. */ - if ((rt->rt_flags & RTF_CLONING) && + if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) && rt_mask(rt)) { rnh->rnh_walktree_from(rnh, dst, rt_mask(rt), rt_fixdelete, rt); @@ -617,7 +617,7 @@ ifa = rt->rt_ifa; /* XXX locking? */ flags = rt->rt_flags & - ~(RTF_CLONING | RTF_STATIC); + ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC); flags |= RTF_WASCLONED; gateway = rt->rt_gateway; if ((netmask = rt->rt_genmask) == 0) @@ -678,11 +678,11 @@ /* * Uh-oh, we already have one of these in the tree. * We do a special hack: if the route that's already - * there was generated by the cloning mechanism - * then we just blow it away and retry the insertion - * of the new one. + * there was generated by the protocol-cloning + * mechanism, then we just blow it away and retry + * the insertion of the new one. */ - rt2 = rtalloc1(dst, 0, 0); + rt2 = rtalloc1(dst, 0, RTF_PRCLONING); if (rt2 && rt2->rt_parent) { rtrequest(RTM_DELETE, rt_key(rt2), @@ -724,7 +724,7 @@ ("no route to clone from")); rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */ rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */ - if ((*ret_nrt)->rt_flags & RTF_CLONING) { + if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) { /* * NB: We do not bump the refcnt on the parent * entry under the assumption that it will @@ -800,7 +800,7 @@ struct rtentry *rt0 = vp; if (rt->rt_parent == rt0 && - !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) { + !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) { return rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0, rt_mask(rt), rt->rt_flags, (struct rtentry **)0); @@ -841,7 +841,7 @@ #endif if (!rt->rt_parent || - (rt->rt_flags & (RTF_PINNED | RTF_CLONING))) { + (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) { #ifdef DEBUG if(rtfcdebug) printf("no parent, pinned or cloning\n"); #endif @@ -992,10 +992,9 @@ * correct choice anyway), and avoid the resulting reference loops * by disallowing any route to run through itself as a gateway. * This is obviously mandatory when we get rt->rt_output(). - * XXX: After removal of PRCLONING this probably not needed anymore. */ if (rt->rt_flags & RTF_GATEWAY) { - rt->rt_gwroute = rtalloc1(gate, 1, 0); + rt->rt_gwroute = rtalloc1(gate, 1, RTF_PRCLONING); if (rt->rt_gwroute == rt) { RTFREE_LOCKED(rt->rt_gwroute); rt->rt_gwroute = 0; ==== //depot/projects/netperf/sys/net/route.h#8 (text+ko) ==== @@ -58,12 +58,6 @@ * These numbers are used by reliable protocols for determining * retransmission behavior and are included in the routing structure. */ -struct rt_metrics_lite { - u_long rmx_mtu; /* MTU for this path */ - u_long rmx_expire; /* lifetime for route, e.g. redirect */ - u_long rmx_pksent; /* packets sent using this route */ -}; - struct rt_metrics { u_long rmx_locks; /* Kernel must leave these values alone */ u_long rmx_mtu; /* MTU for this path */ @@ -110,10 +104,10 @@ long rt_refcnt; /* # held references */ u_long rt_flags; /* up/down?, host/net */ struct ifnet *rt_ifp; /* the answer: interface to use */ - struct ifaddr *rt_ifa; /* the answer: interface address to use */ + struct ifaddr *rt_ifa; /* the answer: interface to use */ struct sockaddr *rt_genmask; /* for generation of cloned routes */ caddr_t rt_llinfo; /* pointer to link level info cache */ - struct rt_metrics_lite rt_rmx; /* metrics used by rx'ing protocols */ + struct rt_metrics rt_rmx; /* metrics used by rx'ing protocols */ struct rtentry *rt_gwroute; /* implied entry for gatewayed routes */ int (*rt_output)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); @@ -157,7 +151,7 @@ #define RTF_PROTO2 0x4000 /* protocol specific routing flag */ #define RTF_PROTO1 0x8000 /* protocol specific routing flag */ -/* 0x10000 unused */ +#define RTF_PRCLONING 0x10000 /* protocol requires cloning */ #define RTF_WASCLONED 0x20000 /* route generated through cloning */ #define RTF_PROTO3 0x40000 /* protocol specific routing flag */ /* 0x80000 unused */ ==== //depot/projects/netperf/sys/net/rtsock.c#7 (text+ko) ==== @@ -86,8 +86,7 @@ static int sysctl_dumpentry(struct radix_node *rn, void *vw); static int sysctl_iflist(int af, struct walkarg *w); static int route_output(struct mbuf *, struct socket *); -static void rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics_lite *); -static void rt_getmetrics(struct rt_metrics_lite *, struct rt_metrics *); +static void rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics *); static void rt_dispatch(struct mbuf *, struct sockaddr *); /* @@ -355,6 +354,9 @@ RT_LOCK(saved_nrt); rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx, &saved_nrt->rt_rmx); + saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); + saved_nrt->rt_rmx.rmx_locks |= + (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); saved_nrt->rt_refcnt--; saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK]; RT_UNLOCK(saved_nrt); @@ -425,7 +427,7 @@ (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, (struct walkarg *)0); rtm->rtm_flags = rt->rt_flags; - rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); + rtm->rtm_rmx = rt->rt_rmx; rtm->rtm_addrs = info.rti_addrs; break; @@ -475,7 +477,9 @@ rt->rt_genmask = info.rti_info[RTAX_GENMASK]; /* FALLTHROUGH */ case RTM_LOCK: - /* We don't support locks anymore */ + rt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits); + rt->rt_rmx.rmx_locks |= + (rtm->rtm_inits & rtm->rtm_rmx.rmx_locks); break; } RT_UNLOCK(rt); @@ -537,28 +541,20 @@ } static void -rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics_lite *out) +rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics *out) { #define metric(f, e) if (which & (f)) out->e = in->e; - /* - * Only these are stored in the routing entry since introduction - * of tcp hostcache. The rest is ignored. - */ + metric(RTV_RPIPE, rmx_recvpipe); + metric(RTV_SPIPE, rmx_sendpipe); + metric(RTV_SSTHRESH, rmx_ssthresh); + metric(RTV_RTT, rmx_rtt); + metric(RTV_RTTVAR, rmx_rttvar); + metric(RTV_HOPCOUNT, rmx_hopcount); metric(RTV_MTU, rmx_mtu); metric(RTV_EXPIRE, rmx_expire); #undef metric } -static void -rt_getmetrics(struct rt_metrics_lite *in, struct rt_metrics *out) -{ -#define metric(e) out->e = in->e; - bzero(out, sizeof(*out)); - metric(rmx_mtu); - metric(rmx_expire); -#undef metric -} - #define ROUNDUP(a) \ ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) @@ -948,8 +944,8 @@ struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem; rtm->rtm_flags = rt->rt_flags; - rtm->rtm_use = rt->rt_rmx.rmx_pksent; - rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx); + rtm->rtm_use = rt->rt_use; + rtm->rtm_rmx = rt->rt_rmx; rtm->rtm_index = rt->rt_ifp->if_index; rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0; rtm->rtm_addrs = info.rti_addrs; ==== //depot/projects/netperf/sys/netatalk/ddp_output.c#4 (text+ko) ==== @@ -217,7 +217,7 @@ elh->el_type = ELAP_DDPEXTEND; elh->el_dnode = gate.sat_addr.s_node; } - ro->ro_rt->rt_rmx.rmx_pksent++; + ro->ro_rt->rt_use++; #ifdef NETATALK_DEBUG printf ("ddp_route: from %d.%d to %d.%d, via %d.%d (%s%d)\n", ==== //depot/projects/netperf/sys/netinet/icmp_var.h#3 (text+ko) ==== @@ -81,12 +81,11 @@ extern int badport_bandlim(int); #define BANDLIM_UNLIMITED -1 #define BANDLIM_ICMP_UNREACH 0 -#define BANDLIM_ICMP_UNREACH_HOST 1 -#define BANDLIM_ICMP_ECHO 2 -#define BANDLIM_ICMP_TSTAMP 3 -#define BANDLIM_RST_CLOSEDPORT 4 /* No connection, and no listeners */ -#define BANDLIM_RST_OPENPORT 5 /* No connection, listener */ -#define BANDLIM_MAX 5 +#define BANDLIM_ICMP_ECHO 1 +#define BANDLIM_ICMP_TSTAMP 2 +#define BANDLIM_RST_CLOSEDPORT 3 /* No connection, and no listeners */ +#define BANDLIM_RST_OPENPORT 4 /* No connection, listener */ +#define BANDLIM_MAX 4 #endif #endif ==== //depot/projects/netperf/sys/netinet/in_pcb.c#7 (text+ko) ==== @@ -536,6 +536,7 @@ if (error) return (error); } + if (!TAILQ_EMPTY(&in_ifaddrhead)) { /* * If the destination address is INADDR_ANY, @@ -553,8 +554,7 @@ &in_ifaddrhead)->ia_broadaddr)->sin_addr; } if (laddr.s_addr == INADDR_ANY) { - struct route *ro; - struct route sro; + register struct route *ro; ia = (struct in_ifaddr *)0; /* @@ -563,10 +563,19 @@ * Note that we should check the address family of the cached * destination, in case of sharing the cache with IPv6. */ - ro = &sro; - bzero(ro, sizeof(*ro)); - if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) { - /* Find out route to destination */ + ro = &inp->inp_route; + if (ro->ro_rt && + (ro->ro_dst.sa_family != AF_INET || + satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr || + inp->inp_socket->so_options & SO_DONTROUTE)) { + RTFREE(ro->ro_rt); + ro->ro_rt = (struct rtentry *)0; + } + if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ + (ro->ro_rt == (struct rtentry *)0 || + ro->ro_rt->rt_ifp == (struct ifnet *)0)) { + /* No route yet, so try to acquire one */ + bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); ro->ro_dst.sa_family = AF_INET; ro->ro_dst.sa_len = sizeof(struct sockaddr_in); ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr; @@ -580,8 +589,6 @@ */ if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) ia = ifatoia(ro->ro_rt->rt_ifa); - if (ro->ro_rt) - RTFREE(ro->ro_rt); if (ia == 0) { bzero(&sa, sizeof(sa)); sa.sin_addr = faddr; @@ -668,6 +675,8 @@ } if (inp->inp_options) (void)m_free(inp->inp_options); + if (inp->inp_route.ro_rt) + RTFREE(inp->inp_route.ro_rt); ip_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; INP_LOCK_DESTROY(inp); @@ -841,6 +850,60 @@ } /* + * Check for alternatives when higher level complains + * about service problems. For now, invalidate cached + * routing information. If the route was created dynamically + * (by a redirect), time to try a default gateway again. + */ +void +in_losing(inp) + struct inpcb *inp; +{ + register struct rtentry *rt; + struct rt_addrinfo info; + + if ((rt = inp->inp_route.ro_rt)) { + RT_LOCK(rt); + inp->inp_route.ro_rt = NULL; + bzero((caddr_t)&info, sizeof(info)); + info.rti_flags = rt->rt_flags; + info.rti_info[RTAX_DST] = rt_key(rt); + info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; + info.rti_info[RTAX_NETMASK] = rt_mask(rt); + rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); + if (rt->rt_flags & RTF_DYNAMIC) { + RT_UNLOCK(rt); /* XXX refcnt? */ + (void) rtrequest1(RTM_DELETE, &info, NULL); + } else + rtfree(rt); + /* + * A new route can be allocated + * the next time output is attempted. + */ + } +} + +/* + * After a routing change, flush old routing + * and allocate a (hopefully) better one. + */ +struct inpcb * +in_rtchange(inp, errno) + register struct inpcb *inp; + int errno; +{ + if (inp->inp_route.ro_rt) { + RTFREE(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = 0; + /* + * A new route can be allocated the next time + * output is attempted. + */ + } + return inp; +} + +/* * Lookup a PCB based on the local address and port. */ struct inpcb * ==== //depot/projects/netperf/sys/netinet/in_pcb.h#8 (text+ko) ==== @@ -94,22 +94,31 @@ /* * XXX - * the defines for inc_* are hacks and should be changed to direct references + * At some point struct route should possibly change to: + * struct rtentry *rt + * struct in_endpoints *ie; */ struct in_conninfo { u_int8_t inc_flags; u_int8_t inc_len; u_int16_t inc_pad; /* XXX alignment for in_endpoints */ - /* protocol dependent part */ + /* protocol dependent part; cached route */ struct in_endpoints inc_ie; + union { + /* placeholder for routing entry */ + struct route inc4_route; + struct route_in6 inc6_route; + } inc_dependroute; }; #define inc_isipv6 inc_flags /* temp compatability */ #define inc_fport inc_ie.ie_fport #define inc_lport inc_ie.ie_lport #define inc_faddr inc_ie.ie_faddr #define inc_laddr inc_ie.ie_laddr +#define inc_route inc_dependroute.inc4_route #define inc6_faddr inc_ie.ie6_faddr #define inc6_laddr inc_ie.ie6_laddr +#define inc6_route inc_dependroute.inc6_route struct icmp6_filter; @@ -147,6 +156,7 @@ #define inp_lport inp_inc.inc_lport #define inp_faddr inp_inc.inc_faddr #define inp_laddr inp_inc.inc_laddr +#define inp_route inp_inc.inc_route #define inp_ip_tos inp_depend4.inp4_ip_tos #define inp_options inp_depend4.inp4_options #define inp_moptions inp_depend4.inp4_moptions @@ -172,6 +182,7 @@ #define in6p_faddr inp_inc.inc6_faddr #define in6p_laddr inp_inc.inc6_laddr +#define in6p_route inp_inc.inc6_route #define in6p_ip6_hlim inp_depend6.inp6_hlim #define in6p_hops inp_depend6.inp6_hops /* default hop limit */ #define in6p_ip6_nxt inp_ip_p @@ -316,6 +327,9 @@ extern int ipport_hilastauto; void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); +void in_losing(struct inpcb *); +struct inpcb * + in_rtchange(struct inpcb *, int); int in_pcballoc(struct socket *, struct inpcbinfo *, struct thread *); int in_pcbbind(struct inpcb *, struct sockaddr *, struct thread *); int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, ==== //depot/projects/netperf/sys/netinet/in_rmx.c#9 (text+ko) ==== @@ -73,6 +73,15 @@ struct radix_node *ret; /* + * For IP, all unicast non-host routes are automatically cloning. + */ + if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) + rt->rt_flags |= RTF_MULTICAST; + + if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) + rt->rt_flags |= RTF_PRCLONING; + + /* * A little bit of help for both IP output and input: * For host routes, we make sure that RTF_BROADCAST * is set for anything that looks like a broadcast address. @@ -85,7 +94,8 @@ * * We also mark routes to multicast addresses as such, because * it's easy to do and might be useful (but this is much more - * dubious since it's so easy to inspect the address). + * dubious since it's so easy to inspect the address). (This + * is done above.) */ if (rt->rt_flags & RTF_HOST) { if (in_broadcast(sin->sin_addr, rt->rt_ifp)) { @@ -95,10 +105,9 @@ rt->rt_flags |= RTF_LOCAL; } } - if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) - rt->rt_flags |= RTF_MULTICAST; - if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp) + if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) && + rt->rt_ifp) rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; ret = rn_addroute(v_arg, n_arg, head, treenodes); @@ -109,7 +118,8 @@ * Find out if it is because of an * ARP entry and delete it if so. */ - rt2 = rtalloc1((struct sockaddr *)sin, 0, RTF_CLONING); + rt2 = rtalloc1((struct sockaddr *)sin, 0, + RTF_CLONING | RTF_PRCLONING); if (rt2) { if (rt2->rt_flags & RTF_LLINFO && rt2->rt_flags & RTF_HOST && @@ -128,6 +138,14 @@ RTFREE_LOCKED(rt2); } } + + /* + * If the new route created successfully, and we are forwarding, + * flush any cached routes to avoid using a stale value. + */ + if (ret != NULL && ipforwarding) + ip_forward_cacheinval(); + return ret; } @@ -380,7 +398,7 @@ * so that behavior is not needed there. */ RT_LOCK(rt); - rt->rt_flags &= ~RTF_CLONING; + rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); RT_UNLOCK(rt); err = rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); ==== //depot/projects/netperf/sys/netinet/in_var.h#4 (text+ko) ==== @@ -230,7 +230,9 @@ void ip_input(struct mbuf *); int in_ifadown(struct ifaddr *ifa, int); void in_ifscrub(struct ifnet *, struct in_ifaddr *); -int ip_fastforward(struct mbuf *); +int ipflow_fastforward(struct mbuf *); +void ipflow_create(const struct route *, struct mbuf *); +void ipflow_slowtimo(void); #endif /* _KERNEL */ ==== //depot/projects/netperf/sys/netinet/ip_divert.c#8 (text+ko) ==== @@ -333,7 +333,7 @@ /* Send packet to output processing */ ipstat.ips_rawout++; /* XXX */ error = ip_output((struct mbuf *)&divert_tag, - inp->inp_options, NULL, + inp->inp_options, &inp->inp_route, (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST | IP_RAWOUTPUT, inp->inp_moptions, NULL); ==== //depot/projects/netperf/sys/netinet/ip_fw.h#3 (text+ko) ==== @@ -28,7 +28,6 @@ #ifndef _IPFW2_H #define _IPFW2_H #define IPFW2 1 - /* * The kernel representation of ipfw rules is made of a list of * 'instructions' (for all practical purposes equivalent to BPF ==== //depot/projects/netperf/sys/netinet/ip_fw2.c#12 (text+ko) ==== @@ -461,16 +461,13 @@ dst->sin_len = sizeof(*dst); dst->sin_addr = src; - rtalloc_ign(&ro, RTF_CLONING); + rtalloc_ign(&ro, RTF_CLONING|RTF_PRCLONING); } - if (ro.ro_rt == NULL) + if ((ro.ro_rt == NULL) || (ifp == NULL) || + (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) return 0; - if ((ifp == NULL) || (ro.ro_rt->rt_ifp->if_index != ifp->if_index)) { - RTFREE(ro.ro_rt); - return 0; - } - RTFREE(ro.ro_rt); + return 1; } @@ -1162,6 +1159,7 @@ struct mbuf *m; struct ip *ip; struct tcphdr *tcp; + struct route sro; /* fake route */ MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == 0) @@ -1227,8 +1225,12 @@ */ ip->ip_ttl = ip_defttl; ip->ip_len = m->m_pkthdr.len; + bzero (&sro, sizeof (sro)); + ip_rtaddr(ip->ip_dst, &sro); m->m_flags |= M_SKIP_FIREWALL; - ip_output(m, NULL, NULL, 0, NULL, NULL); + ip_output(m, NULL, &sro, 0, NULL, NULL); + if (sro.ro_rt) + RTFREE(sro.ro_rt); } /* ==== //depot/projects/netperf/sys/netinet/ip_icmp.c#7 (text+ko) ==== @@ -52,15 +52,11 @@ #include <net/route.h> #include <netinet/in.h> -#include <netinet/in_pcb.h> #include <netinet/in_systm.h> #include <netinet/in_var.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> #include <netinet/ip_var.h> -#include <netinet/tcp.h> -#include <netinet/tcp_var.h> -#include <netinet/tcpip.h> #include <netinet/icmp_var.h> #ifdef IPSEC @@ -124,7 +120,7 @@ #endif static void icmp_reflect(struct mbuf *); -static void icmp_send(struct mbuf *, struct mbuf *); +static void icmp_send(struct mbuf *, struct mbuf *, struct route *); static int ip_next_mtu(int, int); extern struct protosw inetsw[]; @@ -169,18 +165,6 @@ if (n->m_flags & (M_BCAST|M_MCAST)) goto freeit; /* - * Limit sending of ICMP host unreachable messages. - * If we are acting as a router and someone is doing a sweep - * scan (eg. nmap and/or numerous windows worms) for destinations - * we are the gateway for but are not reachable (ie. a /24 on a - * interface and only a couple of hosts on the ethernet) we would - * generate a storm of ICMP host unreachable messages. - */ - if (type == ICMP_UNREACH && code == ICMP_UNREACH_HOST) { - if (badport_bandlim(BANDLIM_ICMP_UNREACH_HOST) < 0) - goto freeit; - } - /* * First, formulate icmp message */ m = m_gethdr(M_DONTWAIT, MT_HEADER); @@ -249,34 +233,27 @@ m_freem(n); } +static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET }; +static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET }; +static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET }; + /* * Process a received ICMP message. */ void icmp_input(m, off) - struct mbuf *m; + register struct mbuf *m; int off; { int hlen = off; - struct icmp *icp; + register struct icmp *icp; + register struct ip *ip = mtod(m, struct ip *); + int icmplen = ip->ip_len; + register int i; struct in_ifaddr *ia; - struct ip *ip = mtod(m, struct ip *); - int icmplen = ip->ip_len; - int i, code; void (*ctlfunc)(int, struct sockaddr *, void *); - struct sockaddr_in icmpsrc, icmpdst, icmpgw; + int code; - /* Initialize */ - bzero(&icmpsrc, sizeof(icmpsrc)); - icmpsrc.sin_len = sizeof(struct sockaddr_in); - icmpsrc.sin_family = AF_INET; - bzero(&icmpdst, sizeof(icmpdst)); - icmpdst.sin_len = sizeof(struct sockaddr_in); - icmpdst.sin_family = AF_INET; - bzero(&icmpgw, sizeof(icmpgw)); - icmpgw.sin_len = sizeof(struct sockaddr_in); - icmpgw.sin_family = AF_INET; - /* * Locate icmp structure in mbuf, and check * that not corrupted and of at least minimum length. @@ -410,7 +387,7 @@ printf("deliver to protocol %d\n", icp->icmp_ip.ip_p); #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; - +#if 1 /* * MTU discovery: * If we got a needfrag and there is a host route to the @@ -420,38 +397,40 @@ * notice that the MTU has changed and adapt accordingly. * If no new MTU was suggested, then we guess a new one * less than the current value. If the new MTU is - * unreasonably small (defined by sysctl tcp_minmss), then - * we don't update the MTU value. - * - * XXX: All this should be done in tcp_mtudisc() because - * the way we do it now, everyone can send us bogus ICMP - * MSGSIZE packets for any destination. By doing this far - * higher in the chain we have a matching tcp connection. - * Thus spoofing is much harder. However there is no easy - * non-hackish way to pass the new MTU up to tcp_mtudisc(). - * Also see next XXX regarding IPv4 AH TCP. + * unreasonably small (arbitrarily set at 296), then + * we reset the MTU to the interface value and enable the + * lock bit, indicating that we are no longer doing MTU + * discovery. */ if (code == PRC_MSGSIZE) { + struct rtentry *rt; int mtu; - struct in_conninfo inc; - bzero(&inc, sizeof(inc)); - inc.inc_flags = 0; /* IPv4 */ - inc.inc_faddr = icmpsrc.sin_addr; - - mtu = ntohs(icp->icmp_nextmtu); - if (!mtu) - mtu = ip_next_mtu(mtu, 1); - - if (mtu >= max(296, (tcp_minmss + sizeof(struct tcpiphdr)))) - tcp_hc_updatemtu(&inc, mtu); - + rt = rtalloc1((struct sockaddr *)&icmpsrc, 0, + RTF_CLONING | RTF_PRCLONING); + if (rt && (rt->rt_flags & RTF_HOST) + && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { + mtu = ntohs(icp->icmp_nextmtu); + if (!mtu) + mtu = ip_next_mtu(rt->rt_rmx.rmx_mtu, + 1); #ifdef DEBUG_MTUDISC - printf("MTU for %s reduced to %d\n", - inet_ntoa(icmpsrc.sin_addr), mtu); + printf("MTU for %s reduced to %d\n", + inet_ntoa(icmpsrc.sin_addr), mtu); #endif + if (mtu < 296) { + /* rt->rt_rmx.rmx_mtu = + rt->rt_ifp->if_mtu; */ + rt->rt_rmx.rmx_locks |= RTV_MTU; + } else if (rt->rt_rmx.rmx_mtu > mtu) { + rt->rt_rmx.rmx_mtu = mtu; + } + } + if (rt) + rtfree(rt); } +#endif /* * XXX if the packet contains [IPv4 AH TCP], we can't make a * notification to TCP layer. @@ -628,6 +607,7 @@ struct in_addr t; struct mbuf *opts = 0; int optlen = (ip->ip_hl << 2) - sizeof(struct ip); + struct route *ro = NULL, rt; if (!in_canforward(ip->ip_src) && ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) != @@ -638,6 +618,8 @@ } t = ip->ip_dst; ip->ip_dst = ip->ip_src; + ro = &rt; + bzero(ro, sizeof(*ro)); /* * If the incoming packet was addressed directly to us, * use dst as the src for the reply. Otherwise (broadcast @@ -658,7 +640,7 @@ goto match; } } - ia = ip_rtaddr(ip->ip_dst); + ia = ip_rtaddr(ip->ip_dst, ro); /* We need a route to do anything useful. */ if (ia == NULL) { m_freem(m); >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200310240252.h9O2qF2g012941>