From owner-p4-projects@FreeBSD.ORG Thu Dec 7 16:54:43 2006 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id E09C916A512; Thu, 7 Dec 2006 16:54:42 +0000 (UTC) X-Original-To: perforce@FreeBSD.org Delivered-To: perforce@FreeBSD.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 8A86216A50A for ; Thu, 7 Dec 2006 16:54:42 +0000 (UTC) (envelope-from zec@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [69.147.83.41]) by mx1.FreeBSD.org (Postfix) with ESMTP id 5A85E43DCD for ; Thu, 7 Dec 2006 16:45:22 +0000 (GMT) (envelope-from zec@FreeBSD.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.6/8.13.6) with ESMTP id kB7Gk9m0002417 for ; Thu, 7 Dec 2006 16:46:10 GMT (envelope-from zec@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.6/8.13.4/Submit) id kB7Gk92U002414 for perforce@freebsd.org; Thu, 7 Dec 2006 16:46:09 GMT (envelope-from zec@FreeBSD.org) Date: Thu, 7 Dec 2006 16:46:09 GMT Message-Id: <200612071646.kB7Gk92U002414@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to zec@FreeBSD.org using -f From: Marko Zec To: Perforce Change Reviews Cc: Subject: PERFORCE change 111250 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 07 Dec 2006 16:54:43 -0000 http://perforce.freebsd.org/chv.cgi?CH=111250 Change 111250 by zec@zec_tca51 on 2006/12/07 16:45:41 Initial attempt at virtualizing tcp_hostcache. Affected files ... .. //depot/projects/vimage/src/sys/netinet/tcp_hostcache.c#3 edit .. //depot/projects/vimage/src/sys/netinet/tcp_hostcache.h#1 add .. //depot/projects/vimage/src/sys/netinet/tcp_subr.c#5 edit .. //depot/projects/vimage/src/sys/netinet/vinet.h#3 edit Differences ... ==== //depot/projects/vimage/src/sys/netinet/tcp_hostcache.c#3 (text+ko) ==== @@ -60,11 +60,6 @@ * memory constrains. */ -/* - * Many thanks to jlemon for basic structure of tcp_syncache which is being - * followed here. - */ - #include "opt_inet6.h" #include "opt_vimage.h" @@ -98,57 +93,14 @@ #ifdef INET6 #include #endif +#include #include -TAILQ_HEAD(hc_qhead, hc_metrics); - -struct hc_head { - struct hc_qhead hch_bucket; - u_int hch_length; - struct mtx hch_mtx; -}; - -struct hc_metrics { - /* housekeeping */ - TAILQ_ENTRY(hc_metrics) rmx_q; - struct hc_head *rmx_head; /* head of bucket tail queue */ - struct in_addr ip4; /* IP address */ - struct in6_addr ip6; /* IP6 address */ - /* endpoint specific values for tcp */ - u_long rmx_mtu; /* MTU for this path */ - u_long rmx_ssthresh; /* outbound gateway buffer limit */ - u_long rmx_rtt; /* estimated round trip time */ - u_long rmx_rttvar; /* estimated rtt variance */ - u_long rmx_bandwidth; /* estimated bandwidth */ - u_long rmx_cwnd; /* congestion window */ - u_long rmx_sendpipe; /* outbound delay-bandwidth product */ - u_long rmx_recvpipe; /* inbound delay-bandwidth product */ - /* tcp hostcache internal data */ - int rmx_expire; /* lifetime for object */ - u_long rmx_hits; /* number of hits */ - u_long rmx_updates; /* number of updates */ -}; - -/* Arbitrary values */ -#define TCP_HOSTCACHE_HASHSIZE 512 -#define TCP_HOSTCACHE_BUCKETLIMIT 30 -#define TCP_HOSTCACHE_EXPIRE 60*60 /* one hour */ -#define TCP_HOSTCACHE_PRUNE 5*60 /* every 5 minutes */ - -struct tcp_hostcache { - struct hc_head *hashbase; - uma_zone_t zone; - u_int hashsize; - u_int hashmask; - u_int bucket_limit; - u_int cache_count; - u_int cache_limit; - int expire; - int purgeall; -}; +#ifndef VIMAGE static struct tcp_hostcache tcp_hostcache; +#endif static struct callout tcp_hc_callout; @@ -157,25 +109,32 @@ static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS); static void tcp_hc_purge(void *); -SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, "TCP Host cache"); +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, + "TCP Host cache"); -SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN, - &tcp_hostcache.cache_limit, 0, "Overall entry limit for hostcache"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, cachelimit, + CTLFLAG_RDTUN, tcp_hostcache.cache_limit, 0, + "Overall entry limit for hostcache"); -SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN, - &tcp_hostcache.hashsize, 0, "Size of TCP hostcache hashtable"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, hashsize, + CTLFLAG_RDTUN, tcp_hostcache.hashsize, 0, + "Size of TCP hostcache hashtable"); -SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN, - &tcp_hostcache.bucket_limit, 0, "Per-bucket hash limit for hostcache"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, bucketlimit, + CTLFLAG_RDTUN, tcp_hostcache.bucket_limit, 0, + "Per-bucket hash limit for hostcache"); -SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD, - &tcp_hostcache.cache_count, 0, "Current number of entries in hostcache"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, count, + CTLFLAG_RD, tcp_hostcache.cache_count, 0, + "Current number of entries in hostcache"); -SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_RW, - &tcp_hostcache.expire, 0, "Expire time of TCP hostcache entries"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, expire, + CTLFLAG_RW, tcp_hostcache.expire, 0, + "Expire time of TCP hostcache entries"); -SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_RW, - &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, purge, + CTLFLAG_RW, tcp_hostcache.purgeall, 0, + "Expire all entires on next purge run"); SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0, @@ -186,7 +145,7 @@ #define HOSTCACHE_HASH(ip) \ (((ip)->s_addr ^ ((ip)->s_addr >> 7) ^ ((ip)->s_addr >> 17)) & \ - tcp_hostcache.hashmask) + V_tcp_hostcache.hashmask) /* XXX: What is the recommended hash to get good entropy for IPv6 addresses? */ #define HOSTCACHE_HASH6(ip6) \ @@ -194,7 +153,7 @@ (ip6)->s6_addr32[1] ^ \ (ip6)->s6_addr32[2] ^ \ (ip6)->s6_addr32[3]) & \ - tcp_hostcache.hashmask) + V_tcp_hostcache.hashmask) #define THC_LOCK(lp) mtx_lock(lp) #define THC_UNLOCK(lp) mtx_unlock(lp) @@ -202,59 +161,64 @@ void tcp_hc_init(void) { + INIT_VNET_INET(curvnetb); int i; /* * Initialize hostcache structures */ - tcp_hostcache.cache_count = 0; - tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; - tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT; - tcp_hostcache.cache_limit = - tcp_hostcache.hashsize * tcp_hostcache.bucket_limit; - tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE; + V_tcp_hostcache.cache_count = 0; + V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE; + V_tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT; + V_tcp_hostcache.cache_limit = + V_tcp_hostcache.hashsize * V_tcp_hostcache.bucket_limit; + V_tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE; TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize", - &tcp_hostcache.hashsize); + &V_tcp_hostcache.hashsize); TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit", - &tcp_hostcache.cache_limit); + &V_tcp_hostcache.cache_limit); TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit", - &tcp_hostcache.bucket_limit); - if (!powerof2(tcp_hostcache.hashsize)) { + &V_tcp_hostcache.bucket_limit); + if (!powerof2(V_tcp_hostcache.hashsize)) { printf("WARNING: hostcache hash size is not a power of 2.\n"); - tcp_hostcache.hashsize = 512; /* safe default */ + V_tcp_hostcache.hashsize = 512; /* safe default */ } - tcp_hostcache.hashmask = tcp_hostcache.hashsize - 1; + V_tcp_hostcache.hashmask = V_tcp_hostcache.hashsize - 1; /* * Allocate the hash table */ - tcp_hostcache.hashbase = (struct hc_head *) - malloc(tcp_hostcache.hashsize * sizeof(struct hc_head), + V_tcp_hostcache.hashbase = (struct hc_head *) + malloc(V_tcp_hostcache.hashsize * sizeof(struct hc_head), M_HOSTCACHE, M_WAITOK | M_ZERO); /* * Initialize the hash buckets */ - for (i = 0; i < tcp_hostcache.hashsize; i++) { - TAILQ_INIT(&tcp_hostcache.hashbase[i].hch_bucket); - tcp_hostcache.hashbase[i].hch_length = 0; - mtx_init(&tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry", + for (i = 0; i < V_tcp_hostcache.hashsize; i++) { + TAILQ_INIT(&V_tcp_hostcache.hashbase[i].hch_bucket); + V_tcp_hostcache.hashbase[i].hch_length = 0; + mtx_init(&V_tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry", NULL, MTX_DEF); } /* * Allocate the hostcache entries. + * + * XXX don't need a separate zone for each hc instance - revisit!!! */ - tcp_hostcache.zone = uma_zcreate("hostcache", sizeof(struct hc_metrics), - NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - uma_zone_set_max(tcp_hostcache.zone, tcp_hostcache.cache_limit); + V_tcp_hostcache.zone = + uma_zcreate("hostcache", sizeof(struct hc_metrics), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + uma_zone_set_max(V_tcp_hostcache.zone, V_tcp_hostcache.cache_limit); /* * Set up periodic cache cleanup. */ callout_init(&tcp_hc_callout, CALLOUT_MPSAFE); - callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0); + callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, + tcp_hc_purge, 0); } /* @@ -266,6 +230,7 @@ static struct hc_metrics * tcp_hc_lookup(struct in_conninfo *inc) { + INIT_VNET_INET(curvnetb); int hash; struct hc_head *hc_head; struct hc_metrics *hc_entry; @@ -280,7 +245,7 @@ else hash = HOSTCACHE_HASH(&inc->inc_faddr); - hc_head = &tcp_hostcache.hashbase[hash]; + hc_head = &V_tcp_hostcache.hashbase[hash]; /* * aquire lock for this bucket row @@ -336,7 +301,7 @@ else hash = HOSTCACHE_HASH(&inc->inc_faddr); - hc_head = &tcp_hostcache.hashbase[hash]; + hc_head = &V_tcp_hostcache.hashbase[hash]; /* * aquire lock for this bucket row @@ -348,8 +313,8 @@ /* * If the bucket limit is reached reuse the least used element */ - if (hc_head->hch_length >= tcp_hostcache.bucket_limit || - tcp_hostcache.cache_count >= tcp_hostcache.cache_limit) { + if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit || + V_tcp_hostcache.cache_count >= V_tcp_hostcache.cache_limit) { hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead); /* * At first we were dropping the last element, just to @@ -359,17 +324,17 @@ * be "lossy". */ TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q); - tcp_hostcache.hashbase[hash].hch_length--; - tcp_hostcache.cache_count--; + V_tcp_hostcache.hashbase[hash].hch_length--; + V_tcp_hostcache.cache_count--; V_tcpstat.tcps_hc_bucketoverflow++; #if 0 - uma_zfree(tcp_hostcache.zone, hc_entry); + uma_zfree(V_tcp_hostcache.zone, hc_entry); #endif } else { /* * Allocate a new entry, or balk if not possible */ - hc_entry = uma_zalloc(tcp_hostcache.zone, M_NOWAIT); + hc_entry = uma_zalloc(V_tcp_hostcache.zone, M_NOWAIT); if (hc_entry == NULL) { THC_UNLOCK(&hc_head->hch_mtx); return NULL; @@ -385,14 +350,14 @@ else hc_entry->ip4 = inc->inc_faddr; hc_entry->rmx_head = hc_head; - hc_entry->rmx_expire = tcp_hostcache.expire; + hc_entry->rmx_expire = V_tcp_hostcache.expire; /* * Put it upfront */ TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q); - tcp_hostcache.hashbase[hash].hch_length++; - tcp_hostcache.cache_count++; + V_tcp_hostcache.hashbase[hash].hch_length++; + V_tcp_hostcache.cache_count++; V_tcpstat.tcps_hc_added++; return hc_entry; @@ -406,6 +371,7 @@ void tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite) { + INIT_VNET_INET(curvnetb); struct hc_metrics *hc_entry; /* @@ -421,7 +387,7 @@ return; } hc_entry->rmx_hits++; - hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */ + hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ hc_metrics_lite->rmx_mtu = hc_entry->rmx_mtu; hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh; @@ -446,6 +412,7 @@ u_long tcp_hc_getmtu(struct in_conninfo *inc) { + INIT_VNET_INET(curvnetb); struct hc_metrics *hc_entry; u_long mtu; @@ -454,7 +421,7 @@ return 0; } hc_entry->rmx_hits++; - hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */ + hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ mtu = hc_entry->rmx_mtu; THC_UNLOCK(&hc_entry->rmx_head->hch_mtx); @@ -468,6 +435,7 @@ void tcp_hc_updatemtu(struct in_conninfo *inc, u_long mtu) { + INIT_VNET_INET(curvnetb); struct hc_metrics *hc_entry; /* @@ -484,7 +452,7 @@ return; } hc_entry->rmx_updates++; - hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */ + hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ hc_entry->rmx_mtu = mtu; @@ -517,7 +485,7 @@ return; } hc_entry->rmx_updates++; - hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */ + hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */ if (hcml->rmx_rtt != 0) { if (hc_entry->rmx_rtt == 0) @@ -588,13 +556,14 @@ static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) { + INIT_VNET_INET(curvnetb); int bufsize; int linesize = 128; char *p, *buf; int len, i, error; struct hc_metrics *hc_entry; - bufsize = linesize * (tcp_hostcache.cache_count + 1); + bufsize = linesize * (V_tcp_hostcache.cache_count + 1); p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO); @@ -604,9 +573,9 @@ p += len; #define msec(u) (((u) + 500) / 1000) - for (i = 0; i < tcp_hostcache.hashsize; i++) { - THC_LOCK(&tcp_hostcache.hashbase[i].hch_mtx); - TAILQ_FOREACH(hc_entry, &tcp_hostcache.hashbase[i].hch_bucket, + for (i = 0; i < V_tcp_hostcache.hashsize; i++) { + THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); + TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket, rmx_q) { len = snprintf(p, linesize, "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu " @@ -632,7 +601,7 @@ hc_entry->rmx_expire); p += len; } - THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx); + THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); } #undef msec error = SYSCTL_OUT(req, buf, p - buf); @@ -648,28 +617,32 @@ tcp_hc_purge(void *arg) { struct hc_metrics *hc_entry, *hc_next; - int all = (intptr_t)arg; + int all = 0; int i; - if (tcp_hostcache.purgeall) { + VNETB_ITERLOOP_BEGIN() + INIT_VNET_INET(curvnetb); + if (V_tcp_hostcache.purgeall) { all = 1; - tcp_hostcache.purgeall = 0; + V_tcp_hostcache.purgeall = 0; } - for (i = 0; i < tcp_hostcache.hashsize; i++) { - THC_LOCK(&tcp_hostcache.hashbase[i].hch_mtx); - TAILQ_FOREACH_SAFE(hc_entry, &tcp_hostcache.hashbase[i].hch_bucket, - rmx_q, hc_next) { + for (i = 0; i < V_tcp_hostcache.hashsize; i++) { + THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); + TAILQ_FOREACH_SAFE(hc_entry, + &V_tcp_hostcache.hashbase[i].hch_bucket, + rmx_q, hc_next) { if (all || hc_entry->rmx_expire <= 0) { - TAILQ_REMOVE(&tcp_hostcache.hashbase[i].hch_bucket, + TAILQ_REMOVE(&V_tcp_hostcache.hashbase[i].hch_bucket, hc_entry, rmx_q); - uma_zfree(tcp_hostcache.zone, hc_entry); - tcp_hostcache.hashbase[i].hch_length--; - tcp_hostcache.cache_count--; + uma_zfree(V_tcp_hostcache.zone, hc_entry); + V_tcp_hostcache.hashbase[i].hch_length--; + V_tcp_hostcache.cache_count--; } else hc_entry->rmx_expire -= TCP_HOSTCACHE_PRUNE; } - THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx); + THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); } + VNETB_ITERLOOP_END(); callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0); } ==== //depot/projects/vimage/src/sys/netinet/tcp_subr.c#5 (text+ko) ==== @@ -387,6 +387,7 @@ #undef TCP_MINPROTOHDR tcp_timer_init(); + tcp_hc_init(); #ifdef VIMAGE if (curvnetb != &vnetb_0) @@ -394,7 +395,6 @@ #endif syncache_init(); - tcp_hc_init(); tcp_reass_init(); ISN_LOCK_INIT(); callout_init(&isn_callout, CALLOUT_MPSAFE); ==== //depot/projects/vimage/src/sys/netinet/vinet.h#3 (text+ko) ==== @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -66,6 +67,7 @@ struct inpcbinfo _tcbinfo; struct tcpstat _tcpstat; /* tcp statistics */ TAILQ_HEAD(, tcptw) _twq_2msl; + struct tcp_hostcache _tcp_hostcache; struct inpcbhead _udb; struct inpcbinfo _udbinfo; @@ -110,6 +112,7 @@ #define V_tcbinfo VNET_INET(tcbinfo) #define V_tcpstat VNET_INET(tcpstat) #define V_twq_2msl VNET_INET(twq_2msl) +#define V_tcp_hostcache VNET_INET(tcp_hostcache) #define V_udb VNET_INET(udb) #define V_udbinfo VNET_INET(udbinfo)