From owner-svn-src-head@FreeBSD.ORG Mon Mar 22 23:04:13 2010 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 2E427106564A; Mon, 22 Mar 2010 23:04:13 +0000 (UTC) (envelope-from kmacy@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 1D2118FC0C; Mon, 22 Mar 2010 23:04:13 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o2MN4DCc067014; Mon, 22 Mar 2010 23:04:13 GMT (envelope-from kmacy@svn.freebsd.org) Received: (from kmacy@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o2MN4CYU067011; Mon, 22 Mar 2010 23:04:12 GMT (envelope-from kmacy@svn.freebsd.org) Message-Id: <201003222304.o2MN4CYU067011@svn.freebsd.org> From: Kip Macy Date: Mon, 22 Mar 2010 23:04:12 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r205488 - in head/sys: net netinet X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 22 Mar 2010 23:04:13 -0000 Author: kmacy Date: Mon Mar 22 23:04:12 2010 New Revision: 205488 URL: http://svn.freebsd.org/changeset/base/205488 Log: - boot-time size the ipv4 flowtable and the maximum number of flows - increase flow cleaning frequency and decrease flow caching time when near the flow limit - stop allocating new flows when within 3% of maxflows don't start allocating again until below 12.5% MFC after: 7 days Modified: head/sys/net/flowtable.c head/sys/netinet/ip_input.c Modified: head/sys/net/flowtable.c ============================================================================== --- head/sys/net/flowtable.c Mon Mar 22 22:39:32 2010 (r205487) +++ head/sys/net/flowtable.c Mon Mar 22 23:04:12 2010 (r205488) @@ -155,30 +155,33 @@ struct flowtable_stats { uint64_t ft_frees; uint64_t ft_hits; uint64_t ft_lookups; -} __aligned(128); +} __aligned(CACHE_LINE_SIZE); struct flowtable { struct flowtable_stats ft_stats[MAXCPU]; int ft_size; int ft_lock_count; uint32_t ft_flags; - - uint32_t ft_udp_idle; - uint32_t ft_fin_wait_idle; - uint32_t ft_syn_idle; - uint32_t ft_tcp_idle; - char *ft_name; fl_lock_t *ft_lock; fl_lock_t *ft_unlock; fl_rtalloc_t *ft_rtalloc; + /* + * XXX need to pad out + */ struct mtx *ft_locks; - union flentryp ft_table; bitstr_t *ft_masks[MAXCPU]; bitstr_t *ft_tmpmask; struct flowtable *ft_next; -} __aligned(128); + + uint32_t ft_count __aligned(CACHE_LINE_SIZE); + uint32_t ft_udp_idle __aligned(CACHE_LINE_SIZE); + uint32_t ft_fin_wait_idle; + uint32_t ft_syn_idle; + uint32_t ft_tcp_idle; + boolean_t ft_full; +} __aligned(CACHE_LINE_SIZE); static struct proc *flowcleanerproc; static VNET_DEFINE(struct flowtable *, flow_list_head); @@ -191,9 +194,11 @@ static VNET_DEFINE(uma_zone_t, flow_ipv6 #define V_flow_ipv4_zone VNET(flow_ipv4_zone) #define V_flow_ipv6_zone VNET(flow_ipv6_zone) + static struct cv flowclean_cv; static struct mtx flowclean_lock; static uint32_t flowclean_cycles; +static uint32_t flowclean_freq; #ifdef FLOWTABLE_DEBUG #define FLDPRINTF(ft, flags, fmt, ...) \ @@ -230,7 +235,7 @@ static VNET_DEFINE(int, flowtable_syn_ex static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE; static VNET_DEFINE(int, flowtable_fin_wait_expire) = FIN_WAIT_IDLE; static VNET_DEFINE(int, flowtable_tcp_expire) = TCP_IDLE; -static VNET_DEFINE(int, flowtable_nmbflows) = 4096; +static VNET_DEFINE(int, flowtable_nmbflows); static VNET_DEFINE(int, flowtable_ready) = 0; #define V_flowtable_enable VNET(flowtable_enable) @@ -905,6 +910,61 @@ flowtable_set_hashkey(struct flentry *fl hashkey[i] = key[i]; } +static struct flentry * +flow_alloc(struct flowtable *ft) +{ + struct flentry *newfle; + uma_zone_t zone; + + newfle = NULL; + zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone; + + newfle = uma_zalloc(zone, M_NOWAIT | M_ZERO); + if (newfle != NULL) + atomic_add_int(&ft->ft_count, 1); + return (newfle); +} + +static void +flow_free(struct flentry *fle, struct flowtable *ft) +{ + uma_zone_t zone; + + zone = (ft->ft_flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone; + atomic_add_int(&ft->ft_count, -1); + uma_zfree(zone, fle); +} + +static int +flow_full(struct flowtable *ft) +{ + boolean_t full; + uint32_t count; + + full = ft->ft_full; + count = ft->ft_count; + + if (full && (count < (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 3)))) + ft->ft_full = FALSE; + else if (!full && (count > (V_flowtable_nmbflows - (V_flowtable_nmbflows >> 5)))) + ft->ft_full = TRUE; + + if (full && !ft->ft_full) { + flowclean_freq = 4*hz; + if ((ft->ft_flags & FL_HASH_ALL) == 0) + ft->ft_udp_idle = ft->ft_fin_wait_idle = + ft->ft_syn_idle = ft->ft_tcp_idle = 5; + cv_broadcast(&flowclean_cv); + } else if (!full && ft->ft_full) { + flowclean_freq = 20*hz; + if ((ft->ft_flags & FL_HASH_ALL) == 0) + ft->ft_udp_idle = ft->ft_fin_wait_idle = + ft->ft_syn_idle = ft->ft_tcp_idle = 30; + } + + return (ft->ft_full); +} + static int flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key, uint32_t fibnum, struct route *ro, uint16_t flags) @@ -912,12 +972,10 @@ flowtable_insert(struct flowtable *ft, u struct flentry *fle, *fletail, *newfle, **flep; struct flowtable_stats *fs = &ft->ft_stats[curcpu]; int depth; - uma_zone_t flezone; bitstr_t *mask; uint8_t proto; - flezone = (flags & FL_IPV6) ? V_flow_ipv6_zone : V_flow_ipv4_zone; - newfle = uma_zalloc(flezone, M_NOWAIT | M_ZERO); + newfle = flow_alloc(ft); if (newfle == NULL) return (ENOMEM); @@ -948,9 +1006,8 @@ flowtable_insert(struct flowtable *ft, u * or we lost a race to insert */ FL_ENTRY_UNLOCK(ft, hash); - uma_zfree((newfle->f_flags & FL_IPV6) ? - V_flow_ipv6_zone : V_flow_ipv4_zone, newfle); - + flow_free(newfle, ft); + if (flags & FL_OVERWRITE) goto skip; return (EEXIST); @@ -1147,7 +1204,7 @@ keycheck: } FL_ENTRY_UNLOCK(ft, hash); uncached: - if (flags & FL_NOAUTO) + if (flags & FL_NOAUTO || flow_full(ft)) return (NULL); fs->ft_misses++; @@ -1325,7 +1382,7 @@ flowtable_alloc(char *name, int nentry, * */ static void -fle_free(struct flentry *fle) +fle_free(struct flentry *fle, struct flowtable *ft) { struct rtentry *rt; struct llentry *lle; @@ -1334,8 +1391,7 @@ fle_free(struct flentry *fle) lle = __DEVOLATILE(struct llentry *, fle->f_lle); RTFREE(rt); LLE_FREE(lle); - uma_zfree((fle->f_flags & FL_IPV6) ? - V_flow_ipv6_zone : V_flow_ipv4_zone, fle); + flow_free(fle, ft); } static void @@ -1426,7 +1482,7 @@ flowtable_free_stale(struct flowtable *f flefreehead = fle->f_next; count++; fs->ft_frees++; - fle_free(fle); + fle_free(fle, ft); } if (V_flowtable_debug && count) log(LOG_DEBUG, "freed %d flow entries\n", count); @@ -1518,7 +1574,7 @@ flowtable_cleaner(void) */ mtx_lock(&flowclean_lock); cv_broadcast(&flowclean_cv); - cv_timedwait(&flowclean_cv, &flowclean_lock, 10*hz); + cv_timedwait(&flowclean_cv, &flowclean_lock, flowclean_freq); mtx_unlock(&flowclean_lock); } } @@ -1548,6 +1604,7 @@ static void flowtable_init_vnet(const void *unused __unused) { + V_flowtable_nmbflows = 1024 + maxusers * 64 * mp_ncpus; V_flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4), NULL, NULL, NULL, NULL, 64, UMA_ZONE_MAXBUCKET); V_flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6), @@ -1556,7 +1613,7 @@ flowtable_init_vnet(const void *unused _ uma_zone_set_max(V_flow_ipv6_zone, V_flowtable_nmbflows); V_flowtable_ready = 1; } -VNET_SYSINIT(flowtable_init_vnet, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, +VNET_SYSINIT(flowtable_init_vnet, SI_SUB_SMP, SI_ORDER_ANY, flowtable_init_vnet, NULL); static void @@ -1567,8 +1624,9 @@ flowtable_init(const void *unused __unus mtx_init(&flowclean_lock, "flowclean lock", NULL, MTX_DEF); EVENTHANDLER_REGISTER(ifnet_departure_event, flowtable_flush, NULL, EVENTHANDLER_PRI_ANY); + flowclean_freq = 20*hz; } -SYSINIT(flowtable_init, SI_SUB_KTHREAD_INIT, SI_ORDER_ANY, +SYSINIT(flowtable_init, SI_SUB_SMP, SI_ORDER_MIDDLE, flowtable_init, NULL); Modified: head/sys/netinet/ip_input.c ============================================================================== --- head/sys/netinet/ip_input.c Mon Mar 22 22:39:32 2010 (r205487) +++ head/sys/netinet/ip_input.c Mon Mar 22 23:04:12 2010 (r205488) @@ -327,8 +327,20 @@ ip_init(void) "error %d\n", __func__, i); #ifdef FLOWTABLE - TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size", - &V_ip_output_flowtable_size); + if (TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size", + &V_ip_output_flowtable_size)) { + if (V_ip_output_flowtable_size < 256) + V_ip_output_flowtable_size = 256; + if (!powerof2(V_ip_output_flowtable_size)) { + printf("flowtable must be power of 2 size\n"); + V_ip_output_flowtable_size = 2048; + } + } else { + /* + * round up to the next power of 2 + */ + V_ip_output_flowtable_size = 1 << fls((1024 + maxusers * 64)-1); + } V_ip_ft = flowtable_alloc("ipv4", V_ip_output_flowtable_size, FL_PCPU); #endif