From owner-p4-projects@FreeBSD.ORG Thu Dec 7 20:20:26 2006 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 6456916A4CE; Thu, 7 Dec 2006 20:20:26 +0000 (UTC) X-Original-To: perforce@FreeBSD.org Delivered-To: perforce@FreeBSD.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 1602E16A492 for ; Thu, 7 Dec 2006 20:20:26 +0000 (UTC) (envelope-from zec@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [69.147.83.41]) by mx1.FreeBSD.org (Postfix) with ESMTP id EC8C143CCE for ; Thu, 7 Dec 2006 20:18:56 +0000 (GMT) (envelope-from zec@FreeBSD.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.6/8.13.6) with ESMTP id kB7KJoWQ049558 for ; Thu, 7 Dec 2006 20:19:50 GMT (envelope-from zec@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.6/8.13.4/Submit) id kB7KJnQK049555 for perforce@freebsd.org; Thu, 7 Dec 2006 20:19:49 GMT (envelope-from zec@FreeBSD.org) Date: Thu, 7 Dec 2006 20:19:49 GMT Message-Id: <200612072019.kB7KJnQK049555@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to zec@FreeBSD.org using -f From: Marko Zec To: Perforce Change Reviews Cc: Subject: PERFORCE change 111257 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 07 Dec 2006 20:20:26 -0000 http://perforce.freebsd.org/chv.cgi?CH=111257 Change 111257 by zec@zec_tca51 on 2006/12/07 20:19:28 Virtualize tcp_syncache. Affected files ... .. //depot/projects/vimage/src/sys/netinet/tcp_subr.c#6 edit .. //depot/projects/vimage/src/sys/netinet/tcp_syncache.c#4 edit .. //depot/projects/vimage/src/sys/netinet/tcp_syncache.h#1 add .. //depot/projects/vimage/src/sys/netinet/vinet.h#4 edit Differences ... ==== //depot/projects/vimage/src/sys/netinet/tcp_subr.c#6 (text+ko) ==== @@ -387,6 +387,7 @@ #undef TCP_MINPROTOHDR tcp_timer_init(); + syncache_init(); tcp_hc_init(); #ifdef VIMAGE @@ -394,7 +395,6 @@ return; #endif - syncache_init(); tcp_reass_init(); ISN_LOCK_INIT(); callout_init(&isn_callout, CALLOUT_MPSAFE); ==== //depot/projects/vimage/src/sys/netinet/tcp_syncache.c#4 (text+ko) ==== @@ -79,6 +79,7 @@ #include #include #include +#include #ifdef INET6 #include #endif @@ -112,51 +113,6 @@ &tcp_syncookiesonly, 0, "Use only TCP SYN cookies"); -#define SYNCOOKIE_SECRET_SIZE 8 /* dwords */ -#define SYNCOOKIE_LIFETIME 16 /* seconds */ - -struct syncache { - TAILQ_ENTRY(syncache) sc_hash; - struct in_conninfo sc_inc; /* addresses */ - u_long sc_rxttime; /* retransmit time */ - u_int16_t sc_rxmits; /* retransmit counter */ - - u_int32_t sc_tsreflect; /* timestamp to reflect */ - u_int32_t sc_ts; /* our timestamp to send */ - u_int32_t sc_tsoff; /* ts offset w/ syncookies */ - u_int32_t sc_flowlabel; /* IPv6 flowlabel */ - tcp_seq sc_irs; /* seq from peer */ - tcp_seq sc_iss; /* our ISS */ - struct mbuf *sc_ipopts; /* source route */ - - u_int16_t sc_peer_mss; /* peer's MSS */ - u_int16_t sc_wnd; /* advertised window */ - u_int8_t sc_ip_ttl; /* IPv4 TTL */ - u_int8_t sc_ip_tos; /* IPv4 TOS */ - u_int8_t sc_requested_s_scale:4, - sc_requested_r_scale:4; - u_int8_t sc_flags; -#define SCF_NOOPT 0x01 /* no TCP options */ -#define SCF_WINSCALE 0x02 /* negotiated window scaling */ -#define SCF_TIMESTAMP 0x04 /* negotiated timestamps */ - /* MSS is implicit */ -#define SCF_UNREACH 0x10 /* icmp unreachable received */ -#define SCF_SIGNATURE 0x20 /* send MD5 digests */ -#define SCF_SACK 0x80 /* send SACK option */ -}; - -struct syncache_head { - struct mtx sch_mtx; - TAILQ_HEAD(sch_head, syncache) sch_bucket; - struct callout sch_timer; - int sch_nextc; - u_int sch_length; - u_int sch_oddeven; - u_int32_t sch_secbits_odd[SYNCOOKIE_SECRET_SIZE]; - u_int32_t sch_secbits_even[SYNCOOKIE_SECRET_SIZE]; - u_int sch_reseed; /* time_uptime, seconds */ -}; - static void syncache_drop(struct syncache *, struct syncache_head *); static void syncache_free(struct syncache *); static void syncache_insert(struct syncache *, struct syncache_head *); @@ -183,46 +139,42 @@ #define TCP_SYNCACHE_HASHSIZE 512 #define TCP_SYNCACHE_BUCKETLIMIT 30 -struct tcp_syncache { - struct syncache_head *hashbase; - uma_zone_t zone; - u_int hashsize; - u_int hashmask; - u_int bucket_limit; - u_int cache_count; /* XXX: unprotected */ - u_int cache_limit; - u_int rexmt_limit; - u_int hash_secret; -}; +#ifndef VIMAGE static struct tcp_syncache tcp_syncache; +#endif SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0, "TCP SYN cache"); -SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN, - &tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO, + bucketlimit, CTLFLAG_RDTUN, + tcp_syncache.bucket_limit, 0, "Per-bucket hash limit for syncache"); -SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_RDTUN, - &tcp_syncache.cache_limit, 0, "Overall entry limit for syncache"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO, + cachelimit, CTLFLAG_RDTUN, + tcp_syncache.cache_limit, 0, "Overall entry limit for syncache"); -SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_RD, - &tcp_syncache.cache_count, 0, "Current number of entries in syncache"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO, + count, CTLFLAG_RD, + tcp_syncache.cache_count, 0, "Current number of entries in syncache"); -SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_RDTUN, - &tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO, + hashsize, CTLFLAG_RDTUN, + tcp_syncache.hashsize, 0, "Size of TCP syncache hashtable"); -SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_RW, - &tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions"); +SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_syncache, OID_AUTO, + rexmtlimit, CTLFLAG_RW, + tcp_syncache.rexmt_limit, 0, "Limit on SYN/ACK retransmissions"); static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache"); #define SYNCACHE_HASH(inc, mask) \ - ((tcp_syncache.hash_secret ^ \ + ((V_tcp_syncache.hash_secret ^ \ (inc)->inc_faddr.s_addr ^ \ ((inc)->inc_faddr.s_addr >> 16) ^ \ (inc)->inc_fport ^ (inc)->inc_lport) & mask) #define SYNCACHE_HASH6(inc, mask) \ - ((tcp_syncache.hash_secret ^ \ + ((V_tcp_syncache.hash_secret ^ \ (inc)->inc6_faddr.s6_addr32[0] ^ \ (inc)->inc6_faddr.s6_addr32[3] ^ \ (inc)->inc_fport ^ (inc)->inc_lport) & mask) @@ -258,58 +210,66 @@ static void syncache_free(struct syncache *sc) { + INIT_VNET_INET(curvnetb); + if (sc->sc_ipopts) (void) m_free(sc->sc_ipopts); - uma_zfree(tcp_syncache.zone, sc); + uma_zfree(V_tcp_syncache.zone, sc); } void syncache_init(void) { + INIT_VNET_INET(curvnetb); int i; - tcp_syncache.cache_count = 0; - tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; - tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT; - tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS; - tcp_syncache.hash_secret = arc4random(); + V_tcp_syncache.cache_count = 0; + V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; + V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT; + V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS; + V_tcp_syncache.hash_secret = arc4random(); TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize", - &tcp_syncache.hashsize); + &V_tcp_syncache.hashsize); TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit", - &tcp_syncache.bucket_limit); - if (!powerof2(tcp_syncache.hashsize) || tcp_syncache.hashsize == 0) { + &V_tcp_syncache.bucket_limit); + if (!powerof2(V_tcp_syncache.hashsize) || + V_tcp_syncache.hashsize == 0) { printf("WARNING: syncache hash size is not a power of 2.\n"); - tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; + V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE; } - tcp_syncache.hashmask = tcp_syncache.hashsize - 1; + V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1; /* Set limits. */ - tcp_syncache.cache_limit = - tcp_syncache.hashsize * tcp_syncache.bucket_limit; + V_tcp_syncache.cache_limit = + V_tcp_syncache.hashsize * V_tcp_syncache.bucket_limit; TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit", - &tcp_syncache.cache_limit); + &V_tcp_syncache.cache_limit); /* Allocate the hash table. */ - MALLOC(tcp_syncache.hashbase, struct syncache_head *, - tcp_syncache.hashsize * sizeof(struct syncache_head), + MALLOC(V_tcp_syncache.hashbase, struct syncache_head *, + V_tcp_syncache.hashsize * sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK | M_ZERO); /* Initialize the hash buckets. */ - for (i = 0; i < tcp_syncache.hashsize; i++) { - TAILQ_INIT(&tcp_syncache.hashbase[i].sch_bucket); - mtx_init(&tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head", + for (i = 0; i < V_tcp_syncache.hashsize; i++) { +#ifdef VIMAGE + V_tcp_syncache.hashbase[i].sch_vnetb = curvnetb; +#endif + TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket); + mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head", NULL, MTX_DEF); - callout_init_mtx(&tcp_syncache.hashbase[i].sch_timer, - &tcp_syncache.hashbase[i].sch_mtx, 0); - tcp_syncache.hashbase[i].sch_length = 0; + callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer, + &V_tcp_syncache.hashbase[i].sch_mtx, 0); + V_tcp_syncache.hashbase[i].sch_length = 0; } /* Create the syncache entry zone. */ - tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache), + /* XXX one zone for all vnets should do fine - revisit!!! */ + V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - uma_zone_set_max(tcp_syncache.zone, tcp_syncache.cache_limit); + uma_zone_set_max(V_tcp_syncache.zone, V_tcp_syncache.cache_limit); } /* @@ -319,7 +279,7 @@ static void syncache_insert(struct syncache *sc, struct syncache_head *sch) { - INIT_VNET_INET(curvnetb); + INIT_VNET_INET(sch->sch_vnetb); struct syncache *sc2; SCH_LOCK(sch); @@ -328,7 +288,7 @@ * Make sure that we don't overflow the per-bucket limit. * If the bucket is full, toss the oldest element. */ - if (sch->sch_length >= tcp_syncache.bucket_limit) { + if (sch->sch_length >= V_tcp_syncache.bucket_limit) { KASSERT(!TAILQ_EMPTY(&sch->sch_bucket), ("sch->sch_length incorrect")); sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head); @@ -345,7 +305,7 @@ SCH_UNLOCK(sch); - tcp_syncache.cache_count++; + V_tcp_syncache.cache_count++; V_tcpstat.tcps_sc_added++; } @@ -356,6 +316,7 @@ static void syncache_drop(struct syncache *sc, struct syncache_head *sch) { + INIT_VNET_INET(sch->sch_vnetb); SCH_LOCK_ASSERT(sch); @@ -363,7 +324,7 @@ sch->sch_length--; syncache_free(sc); - tcp_syncache.cache_count--; + V_tcp_syncache.cache_count--; } /* @@ -374,10 +335,10 @@ static void syncache_timer(void *xsch) { - INIT_VNET_INET(curvnetb); /* XXX this can't work !!! */ struct syncache_head *sch = (struct syncache_head *)xsch; struct syncache *sc, *nsc; int tick = ticks; + INIT_VNET_INET(sch->sch_vnetb); /* NB: syncache_head has already been locked by the callout. */ SCH_LOCK_ASSERT(sch); @@ -397,7 +358,7 @@ continue; } - if (sc->sc_rxmits > tcp_syncache.rexmt_limit) { + if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) { syncache_drop(sc, sch); V_tcpstat.tcps_sc_stale++; continue; @@ -419,13 +380,14 @@ struct syncache * syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp) { + INIT_VNET_INET(curvnetb); struct syncache *sc; struct syncache_head *sch; #ifdef INET6 if (inc->inc_isipv6) { - sch = &tcp_syncache.hashbase[ - SYNCACHE_HASH6(inc, tcp_syncache.hashmask)]; + sch = &V_tcp_syncache.hashbase[ + SYNCACHE_HASH6(inc, V_tcp_syncache.hashmask)]; *schp = sch; SCH_LOCK(sch); @@ -438,8 +400,8 @@ } else #endif { - sch = &tcp_syncache.hashbase[ - SYNCACHE_HASH(inc, tcp_syncache.hashmask)]; + sch = &V_tcp_syncache.hashbase[ + SYNCACHE_HASH(inc, V_tcp_syncache.hashmask)]; *schp = sch; SCH_LOCK(sch); @@ -795,7 +757,7 @@ /* Pull out the entry to unlock the bucket row. */ TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash); sch->sch_length--; - tcp_syncache.cache_count--; + V_tcp_syncache.cache_count--; SCH_UNLOCK(sch); } @@ -933,7 +895,7 @@ goto done; } - sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO); + sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO); if (sc == NULL) { /* * The zone allocator couldn't provide more entries. @@ -943,7 +905,7 @@ V_tcpstat.tcps_sc_zonefail++; sc = TAILQ_LAST(&sch->sch_bucket, sch_head); syncache_drop(sc, sch); - sc = uma_zalloc(tcp_syncache.zone, M_NOWAIT | M_ZERO); + sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO); if (sc == NULL) { if (tcp_syncookies) { bzero(&scs, sizeof(scs)); ==== //depot/projects/vimage/src/sys/netinet/vinet.h#4 (text+ko) ==== @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -68,6 +69,7 @@ struct tcpstat _tcpstat; /* tcp statistics */ TAILQ_HEAD(, tcptw) _twq_2msl; struct tcp_hostcache _tcp_hostcache; + struct tcp_syncache _tcp_syncache; struct inpcbhead _udb; struct inpcbinfo _udbinfo; @@ -113,6 +115,7 @@ #define V_tcpstat VNET_INET(tcpstat) #define V_twq_2msl VNET_INET(twq_2msl) #define V_tcp_hostcache VNET_INET(tcp_hostcache) +#define V_tcp_syncache VNET_INET(tcp_syncache) #define V_udb VNET_INET(udb) #define V_udbinfo VNET_INET(udbinfo)