Date: Tue, 18 Aug 2009 20:39:35 +0000 (UTC) From: Kip Macy <kmacy@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org Subject: svn commit: r196369 - in stable/8/sys: net netinet Message-ID: <200908182039.n7IKdZIN027859@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kmacy Date: Tue Aug 18 20:39:35 2009 New Revision: 196369 URL: http://svn.freebsd.org/changeset/base/196369 Log: MFC 196368 - change the interface to flowtable_lookup so that we don't rely on the mbuf for obtaining the fib index - check that a cached flow corresponds to the same fib index as the packet for which we are doing the lookup - at interface detach time flush any flows referencing stale rtentrys associated with the interface that is going away (fixes reported panics) - reduce the time between cleans in case the cleaner is running at the time the eventhandler is called and the wakeup is missed less time will elapse before the eventhandler returns - separate per-vnet initialization from global initialization (pointed out by jeli@) Reviewed by: sam@ Approved by: re@ Modified: stable/8/sys/net/flowtable.c stable/8/sys/net/flowtable.h stable/8/sys/netinet/ip_output.c Modified: stable/8/sys/net/flowtable.c ============================================================================== --- stable/8/sys/net/flowtable.c Tue Aug 18 20:28:58 2009 (r196368) +++ stable/8/sys/net/flowtable.c Tue Aug 18 20:39:35 2009 (r196369) @@ -29,6 +29,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "opt_route.h" #include "opt_mpath.h" +#include "opt_ddb.h" #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -36,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/types.h> #include <sys/bitstring.h> +#include <sys/condvar.h> #include <sys/callout.h> #include <sys/kernel.h> #include <sys/kthread.h> @@ -66,6 +68,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/sctp.h> #include <libkern/jenkins.h> +#include <ddb/ddb.h> struct ipv4_tuple { uint16_t ip_sport; /* source port */ @@ -94,8 +97,9 @@ union ipv6_flow { struct flentry { volatile uint32_t f_fhash; /* hash flowing forward */ uint16_t f_flags; /* flow flags */ - uint8_t f_pad; /* alignment */ + uint8_t f_pad; uint8_t f_proto; /* protocol */ + uint32_t f_fibnum; /* fib index */ uint32_t f_uptime; /* uptime at last access */ struct flentry *f_next; /* pointer to collision entry */ volatile struct rtentry *f_rt; /* rtentry for flow */ @@ -173,6 +177,10 @@ static VNET_DEFINE(uma_zone_t, flow_ipv6 #define V_flow_ipv4_zone VNET(flow_ipv4_zone) #define V_flow_ipv6_zone VNET(flow_ipv6_zone) +static struct cv flowclean_cv; +static struct mtx flowclean_lock; +static uint32_t flowclean_cycles; + /* * TODO: * - Make flowtable stats per-cpu, aggregated at sysctl call time, @@ -288,10 +296,10 @@ SYSCTL_VNET_PROC(_net_inet_flowtable, OI #ifndef RADIX_MPATH static void -in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fib) +in_rtalloc_ign_wrapper(struct route *ro, uint32_t hash, u_int fibnum) { - rtalloc_ign_fib(ro, 0, fib); + rtalloc_ign_fib(ro, 0, fibnum); } #endif @@ -425,7 +433,7 @@ static bitstr_t * flowtable_mask(struct flowtable *ft) { bitstr_t *mask; - + if (ft->ft_flags & FL_PCPU) mask = ft->ft_masks[curcpu]; else @@ -501,7 +509,7 @@ flowtable_set_hashkey(struct flentry *fl static int flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key, - uint8_t proto, struct route *ro, uint16_t flags) + uint8_t proto, uint32_t fibnum, struct route *ro, uint16_t flags) { struct flentry *fle, *fletail, *newfle, **flep; int depth; @@ -564,6 +572,7 @@ skip: fle->f_rt = ro->ro_rt; fle->f_lle = ro->ro_lle; fle->f_fhash = hash; + fle->f_fibnum = fibnum; fle->f_uptime = time_uptime; FL_ENTRY_UNLOCK(ft, hash); return (0); @@ -591,13 +600,13 @@ flowtable_key_equal(struct flentry *fle, } int -flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro) +flowtable_lookup(struct flowtable *ft, struct mbuf *m, struct route *ro, uint32_t fibnum) { uint32_t key[9], hash; struct flentry *fle; uint16_t flags; uint8_t proto = 0; - int error = 0, fib = 0; + int error = 0; struct rtentry *rt; struct llentry *lle; @@ -640,6 +649,7 @@ keycheck: && fle->f_fhash == hash && flowtable_key_equal(fle, key) && (proto == fle->f_proto) + && (fibnum == fle->f_fibnum) && (rt->rt_flags & RTF_UP) && (rt->rt_ifp != NULL)) { V_flowtable_hits++; @@ -668,10 +678,8 @@ uncached: * of arpresolve with an rt_check variant that expected to * receive the route locked */ - if (m != NULL) - fib = M_GETFIB(m); - ft->ft_rtalloc(ro, hash, fib); + ft->ft_rtalloc(ro, hash, fibnum); if (ro->ro_rt == NULL) error = ENETUNREACH; else { @@ -692,7 +700,7 @@ uncached: ro->ro_rt = NULL; return (ENOENT); } - error = flowtable_insert(ft, hash, key, proto, + error = flowtable_insert(ft, hash, key, proto, fibnum, ro, flags); if (error) { @@ -791,35 +799,6 @@ flowtable_alloc(int nentry, int flags) return (ft); } -static void -flowtable_init(const void *unused __unused) -{ - - V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4), - NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET); - V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6), - NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET); - uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows); - uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows); - V_flowtable_ready = 1; -} - -VNET_SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, - flowtable_init, NULL); - -#ifdef VIMAGE -static void -flowtable_uninit(const void *unused __unused) -{ - - uma_zdestroy(V_flow_ipv4_zone); - uma_zdestroy(V_flow_ipv6_zone); -} - -VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, - flowtable_uninit, NULL); -#endif - /* * The rest of the code is devoted to garbage collection of expired entries. * It is a new additon made necessary by the switch to dynamically allocating @@ -973,12 +952,30 @@ flowtable_cleaner(void) } VNET_LIST_RUNLOCK(); + flowclean_cycles++; /* * The 20 second interval between cleaning checks * is arbitrary */ - pause("flowcleanwait", 20*hz); + mtx_lock(&flowclean_lock); + cv_broadcast(&flowclean_cv); + cv_timedwait(&flowclean_cv, &flowclean_lock, 10*hz); + mtx_unlock(&flowclean_lock); + } +} + +static void +flowtable_flush(void *unused __unused) +{ + uint64_t start; + + mtx_lock(&flowclean_lock); + start = flowclean_cycles; + while (start == flowclean_cycles) { + cv_broadcast(&flowclean_cv); + cv_wait(&flowclean_cv, &flowclean_lock); } + mtx_unlock(&flowclean_lock); } static struct kproc_desc flow_kp = { @@ -988,3 +985,159 @@ static struct kproc_desc flow_kp = { }; SYSINIT(flowcleaner, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &flow_kp); +static void +flowtable_init_vnet(const void *unused __unused) +{ + + V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4), + NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET); + V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6), + NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET); + uma_zone_set_max(V_flow_ipv4_zone, V_flowtable_nmbflows); + uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows); +} +VNET_SYSINIT(flowtable_init_vnet, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, + flowtable_init_vnet, NULL); + +static void +flowtable_init(const void *unused __unused) +{ + + cv_init(&flowclean_cv, "flowcleanwait"); + mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF); + EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL, + EVENTHANDLER_PRI_ANY); + V_flowtable_ready = 1; +} +SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, + flowtable_init, NULL); + + +#ifdef VIMAGE +static void +flowtable_uninit(const void *unused __unused) +{ + + uma_zdestroy(V_flow_ipv4_zone); + uma_zdestroy(V_flow_ipv6_zone); +} + +VNET_SYSUNINIT(flowtable_uninit, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, + flowtable_uninit, NULL); +#endif + +#ifdef DDB +static bitstr_t * +flowtable_mask_pcpu(struct flowtable *ft, int cpuid) +{ + bitstr_t *mask; + + if (ft->ft_flags & FL_PCPU) + mask = ft->ft_masks[cpuid]; + else + mask = ft->ft_masks[0]; + + return (mask); +} + +static struct flentry ** +flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid) +{ + struct flentry **fle; + int index = (hash % ft->ft_size); + + if (ft->ft_flags & FL_PCPU) { + fle = &ft->ft_table.pcpu[cpuid][index]; + } else { + fle = &ft->ft_table.global[index]; + } + + return (fle); +} + +static void +flow_show(struct flowtable *ft, struct flentry *fle) +{ + int idle_time; + int rt_valid; + + idle_time = (int)(time_uptime - fle->f_uptime); + rt_valid = fle->f_rt != NULL; + db_printf("hash=0x%08x idle_time=%03d rt=%p ifp=%p", + fle->f_fhash, idle_time, + fle->f_rt, rt_valid ? fle->f_rt->rt_ifp : NULL); + if (rt_valid && (fle->f_rt->rt_flags & RTF_UP)) + db_printf(" RTF_UP "); + if (fle->f_flags & FL_STALE) + db_printf(" FL_STALE "); + db_printf("\n"); +} + +static void +flowtable_show(struct flowtable *ft, int cpuid) +{ + int curbit = 0; + struct flentry *fle, **flehead; + bitstr_t *mask, *tmpmask; + + db_printf("cpu: %d\n", cpuid); + mask = flowtable_mask_pcpu(ft, cpuid); + tmpmask = ft->ft_tmpmask; + memcpy(tmpmask, mask, ft->ft_size/8); + /* + * XXX Note to self, bit_ffs operates at the byte level + * and thus adds gratuitous overhead + */ + bit_ffs(tmpmask, ft->ft_size, &curbit); + while (curbit != -1) { + if (curbit >= ft->ft_size || curbit < -1) { + db_printf("warning: bad curbit value %d \n", + curbit); + break; + } + + flehead = flowtable_entry_pcpu(ft, curbit, cpuid); + fle = *flehead; + + while (fle != NULL) { + flow_show(ft, fle); + fle = fle->f_next; + continue; + } + bit_clear(tmpmask, curbit); + bit_ffs(tmpmask, ft->ft_size, &curbit); + } +} + +static void +flowtable_show_vnet(void) +{ + struct flowtable *ft; + int i; + + ft = V_flow_list_head; + while (ft != NULL) { + if (ft->ft_flags & FL_PCPU) { + for (i = 0; i <= mp_maxid; i++) { + if (CPU_ABSENT(i)) + continue; + flowtable_show(ft, i); + } + } else { + flowtable_show(ft, 0); + } + ft = ft->ft_next; + } +} + +DB_SHOW_COMMAND(flowtables, db_show_flowtables) +{ + VNET_ITERATOR_DECL(vnet_iter); + + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + flowtable_show_vnet(); + CURVNET_RESTORE(); + } +} +#endif Modified: stable/8/sys/net/flowtable.h ============================================================================== --- stable/8/sys/net/flowtable.h Tue Aug 18 20:28:58 2009 (r196368) +++ stable/8/sys/net/flowtable.h Tue Aug 18 20:39:35 2009 (r196369) @@ -49,7 +49,7 @@ struct flowtable *flowtable_alloc(int ne * */ int flowtable_lookup(struct flowtable *ft, struct mbuf *m, - struct route *ro); + struct route *ro, uint32_t fibnum); #endif /* _KERNEL */ #endif Modified: stable/8/sys/netinet/ip_output.c ============================================================================== --- stable/8/sys/netinet/ip_output.c Tue Aug 18 20:28:58 2009 (r196368) +++ stable/8/sys/netinet/ip_output.c Tue Aug 18 20:39:35 2009 (r196369) @@ -157,7 +157,7 @@ ip_output(struct mbuf *m, struct mbuf *o * longer than that long for the stability of ro_rt. The * flow ID assignment must have happened before this point. */ - if (flowtable_lookup(V_ip_ft, m, ro) == 0) + if (flowtable_lookup(V_ip_ft, m, ro, M_GETFIB(m)) == 0) nortfree = 1; #endif }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200908182039.n7IKdZIN027859>