From owner-p4-projects@FreeBSD.ORG Sat Aug 6 08:45:37 2011 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id BF8F11065670; Sat, 6 Aug 2011 08:45:36 +0000 (UTC) Delivered-To: perforce@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 60F62106564A for ; Sat, 6 Aug 2011 08:45:36 +0000 (UTC) (envelope-from goda@FreeBSD.org) Received: from skunkworks.freebsd.org (skunkworks.freebsd.org [IPv6:2001:4f8:fff6::2d]) by mx1.freebsd.org (Postfix) with ESMTP id 4DF668FC12 for ; Sat, 6 Aug 2011 08:45:36 +0000 (UTC) Received: from skunkworks.freebsd.org (localhost [127.0.0.1]) by skunkworks.freebsd.org (8.14.4/8.14.4) with ESMTP id p768jag2074828 for ; Sat, 6 Aug 2011 08:45:36 GMT (envelope-from goda@FreeBSD.org) Received: (from perforce@localhost) by skunkworks.freebsd.org (8.14.4/8.14.4/Submit) id p768jaGO074824 for perforce@freebsd.org; Sat, 6 Aug 2011 08:45:36 GMT (envelope-from goda@FreeBSD.org) Date: Sat, 6 Aug 2011 08:45:36 GMT Message-Id: <201108060845.p768jaGO074824@skunkworks.freebsd.org> X-Authentication-Warning: skunkworks.freebsd.org: perforce set sender to goda@FreeBSD.org using -f From: Kazuya Goda To: Perforce Change Reviews Precedence: bulk Cc: Subject: PERFORCE change 197267 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 06 Aug 2011 08:45:37 -0000 http://p4web.freebsd.org/@@197267?ac=10 Change 197267 by goda@kaffierlime on 2011/08/06 08:44:31 implement SOFT RSS Affected files ... .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/conf/options#4 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/dev/e1000/if_em.h#2 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/kern/uipc_socket.c#3 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/net/netisr.c#4 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_pcb.h#3 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_pcbgroup.c#3 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_rss.c#3 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_rss.h#2 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/ip_input.c#3 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/tcp_input.c#3 edit .. //depot/projects/soc2011/kgoda_rpsrfs/src/sys/sys/sockbuf.h#2 edit Differences ... ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/conf/options#4 (text+ko) ==== @@ -425,6 +425,7 @@ ROUTETABLES opt_route.h RSS opt_rss.h RPS opt_rps.h +SOFT_RSS opt_soft_rss.h SLIP_IFF_OPTS opt_slip.h TCPDEBUG TCP_OFFLOAD_DISABLE opt_inet.h #Disable code to dispatch tcp offloading ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/dev/e1000/if_em.h#2 (text+ko) ==== ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/kern/uipc_socket.c#3 (text+ko) ==== @@ -107,6 +107,7 @@ #include "opt_inet6.h" #include "opt_zero.h" #include "opt_compat.h" +#include "opt_soft_rss.h" #include #include @@ -142,6 +143,10 @@ #include +#ifdef SOFT_RSS +#include +#endif + #ifdef COMPAT_FREEBSD32 #include #include @@ -2302,6 +2307,9 @@ { int error; +#ifdef SOFT_RSS + soft_rss_record_curcpu(&so->so_rcv); +#endif CURVNET_SET(so->so_vnet); error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, controlp, flagsp)); ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/net/netisr.c#4 (text+ko) ==== @@ -65,6 +65,8 @@ #include "opt_ddb.h" #include "opt_device_polling.h" +#include "opt_rps.h" +#include "opt_soft_rss.h" #include #include @@ -94,6 +96,10 @@ #include #include +#ifdef SOFT_RSS +#include +#endif + /* * Locking strategy: three types of locks protect netisr processing: * @@ -860,6 +866,10 @@ VNET_ASSERT(m->m_pkthdr.rcvif != NULL, ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); +#ifdef SOFT_RSS + if (m->m_pkthdr.flowid) + soft_rss_dec_flow_qlen(m->m_pkthdr.flowid); +#endif netisr_proto[proto].np_handler(m); CURVNET_RESTORE(); } @@ -1109,6 +1119,10 @@ */ nwsp->nws_flags |= NWS_DISPATCHING; NWS_UNLOCK(nwsp); +#ifdef SOFT_RSS + if (m->m_pkthdr.flowid) + soft_rss_dec_flow_qlen(m->m_pkthdr.flowid); +#endif netisr_proto[proto].np_handler(m); NWS_LOCK(nwsp); nwsp->nws_flags &= ~NWS_DISPATCHING; @@ -1302,6 +1316,12 @@ } #endif +#ifdef RPS + netisr_defaultthreads = mp_ncpus; + netisr_maxthreads = mp_ncpus; + netisr_bindthreads = 1; +#endif + if (TUNABLE_STR_FETCH("net.isr.dispatch", tmp, sizeof(tmp))) { error = netisr_dispatch_policy_from_str(tmp, &dispatch_policy); ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_pcb.h#3 (text+ko) ==== @@ -45,6 +45,7 @@ #include #ifdef _KERNEL +#include #include #include #include ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_pcbgroup.c#3 (text+ko) ==== ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_rss.c#3 (text+ko) ==== @@ -33,6 +33,7 @@ #include "opt_inet6.h" #include "opt_pcbgroup.h" +#include "opt_soft_rss.h" #ifndef PCBGROUP #error "options RSS depends on options PCBGROUP" @@ -171,6 +172,16 @@ }; static struct rss_table_entry rss_table[RSS_TABLE_MAXLEN]; +#ifdef SOFT_RSS +struct netisr_flow{ + uint16_t cpu; + unsigned qlen; +}; + +static struct netisr_flow *netisr_flow_table; +static unsigned *socket_flow_table; +#endif + static void rss_init(__unused void *arg) { @@ -502,3 +513,137 @@ SYSCTL_PROC(_net_inet_rss, OID_AUTO, key, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_rss_key, "", "RSS keying material"); + +#ifdef SOFT_RSS +MALLOC_DEFINE(M_SOFTRSS_FLOWS, "softrss", "soft rss flow entrys"); + +SYSCTL_NODE(_net_inet, OID_AUTO, softrss, CTLFLAG_RW, 0, + "Software emulate receive-side steering"); + +/* + * + */ +static int softrss_maxflows = 128; +TUNABLE_INT("net.inet.softrss.maxflows", &softrss_maxflows); +SYSCTL_INT(_net_inet_softrss, OID_AUTO, maxflows, CTLFLAG_RDTUN, + &softrss_maxflows, 0, "Flow entrys using soft RSS."); + + +static void +socket_flow_table_init(void) +{ + int i; + + socket_flow_table = (unsigned *)malloc(sizeof(unsigned) * softrss_maxflows, + M_SOFTRSS_FLOWS, M_NOWAIT); + + if (socket_flow_table == NULL) + panic("not allocate memory for soft rss"); + + for (i = 0; i < softrss_maxflows; i++) + socket_flow_table[i] = NO_CURR_CPU; +} +SYSINIT(scoket_flow_table_init, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, + socket_flow_table_init, NULL); + +static void +netisr_flow_table_init(void) +{ + int i; + + netisr_flow_table = (struct netisr_flow *) + malloc(sizeof(struct netisr_flow) * softrss_maxflows, + M_SOFTRSS_FLOWS, M_NOWAIT); + + if (netisr_flow_table == NULL) + panic("not allocate memory for soft rss"); + + for (i = 0; i < softrss_maxflows; i++) { + netisr_flow_table[i].cpu = 0; + netisr_flow_table[i].qlen = 0; + } +} +SYSINIT(netisr_flow_table_init, SI_SUB_CLOCKS, SI_ORDER_MIDDLE, + netisr_flow_table_init, NULL); + +void +soft_rss_record_curcpu(struct sockbuf *sb) +{ + if (sb->flowid) + atomic_store_rel_int(&socket_flow_table[sb->flowid % softrss_maxflows], + curcpu); +} + +static inline int +soft_rss_get_curcpu(int index) +{ + return(atomic_load_acq_int(&socket_flow_table[index])); +} + +static inline void +soft_rss_record_dstcpu(int index, uint16_t cpu) +{ + atomic_store_rel_16(&netisr_flow_table[index].cpu, cpu); +} + +static inline uint16_t +soft_rss_get_dstcpu(int index) +{ + return(atomic_load_acq_16(&netisr_flow_table[index].cpu)); +} + + +static inline void +soft_rss_inc_flow_qlen(int index) +{ + atomic_add_acq_int(&netisr_flow_table[index].qlen, 1); +} + +void +soft_rss_dec_flow_qlen(unsigned flowid) +{ + atomic_subtract_acq_int(&netisr_flow_table[flowid % softrss_maxflows].qlen, 1); +} + +static inline int +soft_rss_get_flow_qlen(int index) +{ + return(atomic_load_acq_int(&netisr_flow_table[index].qlen)); +} + +static u_int +soft_rss_getcpu(u_int flowid) +{ + int index; + u_int cur, dst, qlen; + + index = flowid % softrss_maxflows; + cur = soft_rss_get_curcpu(index); + dst = soft_rss_get_dstcpu(index); + qlen = soft_rss_get_flow_qlen(index); + + if (cur == NO_CURR_CPU){ + cur = netisr_default_flow2cpu(flowid); + soft_rss_record_dstcpu(index, (uint16_t)cur); + } + else if (cur != dst){ + if (qlen == 0) + soft_rss_record_dstcpu(index, (uint16_t)cur); + else + cur = dst; + } + soft_rss_inc_flow_qlen(index); + return ((u_int)cur); +} + +/* + * netisr CPU affinity lookup routine for use by flowid. + */ +struct mbuf * +soft_rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) +{ + *cpuid = soft_rss_getcpu(m->m_pkthdr.flowid); + return (m); +} + +#endif /* SOFT_RSS */ ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/in_rss.h#2 (text+ko) ==== @@ -31,6 +31,9 @@ #define _NETINET_IN_RSS_H_ #include /* in_addr_t */ +#ifdef SOFT_RSS +#include /* struct sockbuf */ +#endif /* * Supported RSS hash functions. @@ -89,4 +92,16 @@ */ struct mbuf *rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid); +#ifdef SOFT_RSS +#define NO_CURR_CPU 0xffffffff + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_SOFTRSS_FLOWS); +#endif + +void soft_rss_record_curcpu(struct sockbuf *sb); +void soft_rss_dec_flow_qlen(u_int flowid); +struct mbuf *soft_rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid); +#endif /* SOFT_RSS */ + #endif /* !_NETINET_IN_RSS_H_ */ ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/ip_input.c#3 (text+ko) ==== @@ -38,6 +38,7 @@ #include "opt_ipsec.h" #include "opt_route.h" #include "opt_rps.h" +#include "opt_soft_rss.h" #include #include @@ -77,6 +78,9 @@ #ifdef IPSEC #include #endif /* IPSEC */ +#ifdef SOFT_RSS +#include +#endif #include @@ -145,8 +149,14 @@ .nh_handler = ip_input, .nh_proto = NETISR_IP, .nh_policy = NETISR_POLICY_FLOW, -#ifdef RPS +#if defined SOFT_RSS + .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_HYBRID, + .nh_m2cpuid = soft_rss_m2cpuid, +#elif defined RPS + .nh_dispatch = NETISR_DISPATCH_DEFERRED, +#else + .nh_dispatch = NETISR_DISPATCH_DIRECT, #endif }; ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/netinet/tcp_input.c#3 (text+ko) ==== @@ -55,6 +55,7 @@ #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_tcpdebug.h" +#include "opt_soft_rss.h" #include #include @@ -857,6 +858,9 @@ rstreason = BANDLIM_RST_CLOSEDPORT; goto dropwithreset; } +#ifdef SOFT_RSS + inp->inp_socket->so_rcv.flowid = m->m_pkthdr.flowid; +#endif INP_WLOCK_ASSERT(inp); if (!(inp->inp_flags & INP_HW_FLOWID) && (m->m_flags & M_FLOWID) ==== //depot/projects/soc2011/kgoda_rpsrfs/src/sys/sys/sockbuf.h#2 (text+ko) ==== @@ -32,6 +32,11 @@ */ #ifndef _SYS_SOCKBUF_H_ #define _SYS_SOCKBUF_H_ + +#ifdef HAVE_KERNEL_OPTION_HEADERS +#include "opt_soft_rss.h" +#endif + #include /* for struct selinfo */ #include #include @@ -101,6 +106,9 @@ short sb_flags; /* (c/d) flags, see below */ int (*sb_upcall)(struct socket *, void *, int); /* (c/d) */ void *sb_upcallarg; /* (c/d) */ +#ifdef SOFT_RSS + uint32_t flowid; +#endif }; #ifdef _KERNEL