From owner-svn-src-user@FreeBSD.ORG Sat Jan 10 02:32:56 2009 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 938EE106566B; Sat, 10 Jan 2009 02:32:56 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 81EDC8FC17; Sat, 10 Jan 2009 02:32:56 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n0A2WuUO076918; Sat, 10 Jan 2009 02:32:56 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n0A2WuUX076916; Sat, 10 Jan 2009 02:32:56 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <200901100232.n0A2WuUX076916@svn.freebsd.org> From: Kip Macy Date: Sat, 10 Jan 2009 02:32:56 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r186994 - user/kmacy/HEAD_fast_net/sys/net X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 10 Jan 2009 02:32:57 -0000 Author: kmacy Date: Sat Jan 10 02:32:56 2009 New Revision: 186994 URL: http://svn.freebsd.org/changeset/base/186994 Log: - add kernel support for "sticky" routes - all connections from a given source ip will be routed to the same dst ip Modified: user/kmacy/HEAD_fast_net/sys/net/flowtable.c user/kmacy/HEAD_fast_net/sys/net/route.h Modified: user/kmacy/HEAD_fast_net/sys/net/flowtable.c ============================================================================== --- user/kmacy/HEAD_fast_net/sys/net/flowtable.c Sat Jan 10 01:47:15 2009 (r186993) +++ user/kmacy/HEAD_fast_net/sys/net/flowtable.c Sat Jan 10 02:32:56 2009 (r186994) @@ -330,13 +330,14 @@ flowtable_pcpu_unlock(struct flowtable * static uint32_t ipv4_flow_lookup_hash_internal(struct mbuf *m, struct route *ro, - uint32_t *key, uint16_t *flags, uint8_t *protop) + uint32_t *key, uint16_t *flags, uint8_t *protop, uint32_t *hash, + uint32_t *hash_noports) { uint16_t sport = 0, dport = 0; struct ip *ip; uint8_t proto = 0; int iphlen; - uint32_t hash; + uint32_t rh; struct sockaddr_in *sin; struct tcphdr *th; struct udphdr *uh; @@ -352,14 +353,16 @@ ipv4_flow_lookup_hash_internal(struct mb key[1] = 0; key[2] = sin->sin_addr.s_addr; - if (m == NULL || (*flags & FL_HASH_PORTS) == 0) + if (m == NULL) goto skipports; - ip = mtod(m, struct ip *); proto = ip->ip_p; iphlen = ip->ip_hl << 2; /* XXX options? */ key[1] = ip->ip_src.s_addr; - + + if ((*flags & FL_HASH_PORTS) == 0) + goto skipports; + switch (proto) { case IPPROTO_TCP: th = (struct tcphdr *)((caddr_t)ip + iphlen); @@ -386,30 +389,27 @@ ipv4_flow_lookup_hash_internal(struct mb break;; } - *protop = proto; - - /* - * If this is a transmit route cache then - * hash all flows to a given destination to - * the same bucket - */ - if ((*flags & FL_HASH_PORTS) == 0) - proto = sport = dport = 0; - - ((uint16_t *)key)[0] = sport; - ((uint16_t *)key)[1] = dport; skipports: - hash = hashword(key, 3, hashjitter + proto); + rh = hashword(key, 3, hashjitter + proto); + *hash_noports = rh; + *hash = 0; + if ((*flags & FL_HASH_PORTS) && sport) { + ((uint16_t *)key)[0] = sport; + ((uint16_t *)key)[1] = dport; + rh = hashword(key, 3, hashjitter + proto); + *hash = rh; + } if (m != NULL && (m->m_flags & M_FLOWID) == 0) - m->m_pkthdr.flowid = hash; - - CTR5(KTR_SPARE3, "proto=%d hash=%x key[0]=%x sport=%d dport=%d\n", proto, hash, key[0], sport, dport); - - return (hash); + m->m_pkthdr.flowid = rh; + + CTR5(KTR_SPARE3, "proto=%d hash=%x key[0]=%x sport=%d dport=%d\n", + proto, *hash, key[0], sport, dport); + + return (0); noop: *protop = proto; - return (0); + return (ENOENT); } static bitstr_t * @@ -566,7 +566,7 @@ flowtable_key_equal(struct flentry *fle, int flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro) { - uint32_t key[9], hash; + uint32_t key[9], hash, hash_noports; struct flentry *fle; uint16_t flags; uint8_t proto = 0; @@ -577,13 +577,14 @@ flowtable_lookup(struct flowtable *ft, s flags = ft ? ft->ft_flags : 0; ro->ro_rt = NULL; ro->ro_lle = NULL; - + hash = hash_noports = 0; + /* * The internal hash lookup is the only IPv4 specific bit * remaining */ - hash = ipv4_flow_lookup_hash_internal(m, ro, key, - &flags, &proto); + error = ipv4_flow_lookup_hash_internal(m, ro, key, + &flags, &proto, &hash, &hash_noports); /* * Ports are zero and this isn't a transmit cache @@ -591,10 +592,13 @@ flowtable_lookup(struct flowtable *ft, s * statex * FL_HASH_PORTS => key[0] != 0 for TCP || UDP || SCTP */ - if (hash == 0 || (key[0] == 0 && (ft->ft_flags & FL_HASH_PORTS))) { + if (error == ENOENT || (key[0] == 0 && (ft->ft_flags & FL_HASH_PORTS))) { cache = 0; goto uncached; } + if ((ft->ft_flags & FL_HASH_PORTS) == 0) + goto skipports; + FL_ENTRY_LOCK(ft, hash); fle = FL_ENTRY(ft, hash); rt = __DEVOLATILE(struct rtentry *, fle->f_rt); @@ -614,6 +618,27 @@ flowtable_lookup(struct flowtable *ft, s } FL_ENTRY_UNLOCK(ft, hash); +skipports: + key[0] = 0; + FL_ENTRY_LOCK(ft, hash_noports); + fle = FL_ENTRY(ft, hash_noports); + rt = __DEVOLATILE(struct rtentry *, fle->f_rt); + lle = __DEVOLATILE(struct llentry *, fle->f_lle); + if ((rt != NULL) + && fle->f_fhash == hash_noports + && flowtable_key_equal(fle, key, flags) + && (proto == fle->f_proto) + && (rt->rt_flags & RTF_UP) + && (rt->rt_ifp != NULL)) { + fle->f_uptime = time_uptime; + fle->f_flags |= flags; + ro->ro_rt = rt; + ro->ro_lle = lle; + FL_ENTRY_UNLOCK(ft, hash_noports); + return (0); + } + FL_ENTRY_UNLOCK(ft, hash_noports); + uncached: /* * This bit of code ends up locking the @@ -639,6 +664,18 @@ uncached: struct rtentry *rt = ro->ro_rt; struct ifnet *ifp = rt->rt_ifp; + if (rt->rt_flags & RTF_STICKY) { + RTFREE(rt); + hash = hash_noports; + ft->ft_rtalloc(ro, hash, fib); + if (ro->ro_rt == NULL) { + error = ENETUNREACH; + goto done; + } + rt = ro->ro_rt; + ifp = rt->rt_ifp; + } + if (rt->rt_flags & RTF_GATEWAY) l3addr = rt->rt_gateway; else @@ -670,7 +707,7 @@ uncached: } error = 0; } - +done: return (error); } @@ -719,7 +756,7 @@ flowtable_alloc(int nentry, int flags) ft->ft_masks[i] = bit_alloc(nentry); } } else { - ft->ft_lock_count = 2*(powerof2(mp_ncpus) ? mp_ncpus : + ft->ft_lock_count = 8*(powerof2(mp_ncpus) ? mp_ncpus : (fls(mp_ncpus) << 1)); ft->ft_lock = flowtable_global_lock; Modified: user/kmacy/HEAD_fast_net/sys/net/route.h ============================================================================== --- user/kmacy/HEAD_fast_net/sys/net/route.h Sat Jan 10 01:47:15 2009 (r186993) +++ user/kmacy/HEAD_fast_net/sys/net/route.h Sat Jan 10 02:32:56 2009 (r186994) @@ -196,9 +196,10 @@ struct ortentry { #define RTF_LOCAL 0x200000 /* route represents a local address */ #define RTF_BROADCAST 0x400000 /* route represents a bcast address */ #define RTF_MULTICAST 0x800000 /* route represents a mcast address */ - /* 0x1000000 and up unassigned */ + /* 0x8000000 and up unassigned */ +#define RTF_STICKY 0x10000000 /* always route dst->src */ #define RTF_SHUTDOWN 0x20000000 /* no new connections */ -#define RTF_RNH_LOCKED 0x40000000 +#define RTF_RNH_LOCKED 0x40000000 /* Mask of RTF flags that are allowed to be modified by RTM_CHANGE. */ #define RTF_FMASK \