Date: Fri, 10 May 2019 13:41:19 +0000 (UTC) From: Andrew Gallatin <gallatin@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r347430 - in head/sys: kern netinet sys Message-ID: <201905101341.x4ADfJvJ087843@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gallatin Date: Fri May 10 13:41:19 2019 New Revision: 347430 URL: https://svnweb.freebsd.org/changeset/base/347430 Log: Bind TCP HPTS (pacer) threads to NUMA domains Bind the TCP pacer threads to NUMA domains and build per-domain pacer-thread lookup tables. These tables allow us to use the inpcb's NUMA domain information to match an inpcb with a pacer thread on the same domain. The motivation for this is to keep the TCP connection local to a NUMA domain as much as possible. Thanks to jhb for pre-reviewing an earlier version of the patch. Reviewed by: rrs Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D20134 Modified: head/sys/kern/kern_intr.c head/sys/netinet/tcp_hpts.c head/sys/sys/interrupt.h Modified: head/sys/kern/kern_intr.c ============================================================================== --- head/sys/kern/kern_intr.c Fri May 10 13:18:22 2019 (r347429) +++ head/sys/kern/kern_intr.c Fri May 10 13:41:19 2019 (r347430) @@ -380,6 +380,25 @@ intr_event_bind_ithread(struct intr_event *ie, int cpu return (_intr_event_bind(ie, cpu, false, true)); } +/* + * Bind an interrupt event's ithread to the specified cpuset. + */ +int +intr_event_bind_ithread_cpuset(struct intr_event *ie, cpuset_t *cs) +{ + lwpid_t id; + + mtx_lock(&ie->ie_lock); + if (ie->ie_thread != NULL) { + id = ie->ie_thread->it_thread->td_tid; + mtx_unlock(&ie->ie_lock); + return (cpuset_setthread(id, cs)); + } else { + mtx_unlock(&ie->ie_lock); + } + return (ENODEV); +} + static struct intr_event * intr_lookup(int irq) { Modified: head/sys/netinet/tcp_hpts.c ============================================================================== --- head/sys/netinet/tcp_hpts.c Fri May 10 13:18:22 2019 (r347429) +++ head/sys/netinet/tcp_hpts.c Fri May 10 13:41:19 2019 (r347430) @@ -131,6 +131,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kern_prefetch.h> #include <vm/uma.h> +#include <vm/vm.h> #include <net/route.h> #include <net/vnet.h> @@ -171,7 +172,7 @@ MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts"); #include <net/rss_config.h> static int tcp_bind_threads = 1; #else -static int tcp_bind_threads = 0; +static int tcp_bind_threads = 2; #endif TUNABLE_INT("net.inet.tcp.bind_hptss", &tcp_bind_threads); @@ -207,6 +208,13 @@ static int32_t logging_on = 0; static int32_t hpts_sleep_max = (NUM_OF_HPTSI_SLOTS - 2); static int32_t tcp_hpts_precision = 120; +struct hpts_domain_info { + int count; + int cpu[MAXCPU]; +}; + +struct hpts_domain_info hpts_domains[MAXMEMDOM]; + SYSCTL_INT(_net_inet_tcp_hpts, OID_AUTO, precision, CTLFLAG_RW, &tcp_hpts_precision, 120, "Value for PRE() precision of callout"); @@ -1079,8 +1087,10 @@ hpts_random_cpu(struct inpcb *inp){ static uint16_t hpts_cpuid(struct inpcb *inp){ u_int cpuid; +#ifdef NUMA + struct hpts_domain_info *di; +#endif - /* * If one has been set use it i.e. we want both in and out on the * same hpts. @@ -1103,11 +1113,21 @@ hpts_cpuid(struct inpcb *inp){ * unknown cpuids to curcpu. Not the best, but apparently better * than defaulting to swi 0. */ - if (inp->inp_flowtype != M_HASHTYPE_NONE) { + + if (inp->inp_flowtype == M_HASHTYPE_NONE) + return (hpts_random_cpu(inp)); + /* + * Hash to a thread based on the flowid. If we are using numa, + * then restrict the hash to the numa domain where the inp lives. + */ +#ifdef NUMA + if (tcp_bind_threads == 2 && inp->inp_numa_domain != M_NODOM) { + di = &hpts_domains[inp->inp_numa_domain]; + cpuid = di->cpu[inp->inp_flowid % di->count]; + } else +#endif cpuid = inp->inp_flowid % mp_ncpus; - return (cpuid); - } - cpuid = hpts_random_cpu(inp); + return (cpuid); #endif } @@ -1781,8 +1801,11 @@ tcp_init_hptsi(void *st) struct timeval tv; sbintime_t sb; struct tcp_hpts_entry *hpts; + struct pcpu *pc; + cpuset_t cs; char unit[16]; uint32_t ncpus = mp_ncpus ? mp_ncpus : MAXCPU; + int count, domain; tcp_pace.rp_proc = NULL; tcp_pace.rp_num_hptss = ncpus; @@ -1861,6 +1884,11 @@ tcp_init_hptsi(void *st) } callout_init(&hpts->co, 1); } + + /* Don't try to bind to NUMA domains if we don't have any */ + if (vm_ndomains == 1 && tcp_bind_threads == 2) + tcp_bind_threads = 0; + /* * Now lets start ithreads to handle the hptss. */ @@ -1875,9 +1903,20 @@ tcp_init_hptsi(void *st) hpts, i, error); } created++; - if (tcp_bind_threads) { + if (tcp_bind_threads == 1) { if (intr_event_bind(hpts->ie, i) == 0) bound++; + } else if (tcp_bind_threads == 2) { + pc = pcpu_find(i); + domain = pc->pc_domain; + CPU_COPY(&cpuset_domain[domain], &cs); + if (intr_event_bind_ithread_cpuset(hpts->ie, &cs) + == 0) { + bound++; + count = hpts_domains[domain].count; + hpts_domains[domain].cpu[count] = i; + hpts_domains[domain].count++; + } } tv.tv_sec = 0; tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_USEC; @@ -1893,9 +1932,20 @@ tcp_init_hptsi(void *st) C_PREL(tcp_hpts_precision)); } } - printf("TCP Hpts created %d swi interrupt thread and bound %d\n", - created, bound); - return; + /* + * If we somehow have an empty domain, fall back to choosing + * among all htps threads. + */ + for (i = 0; i < vm_ndomains; i++) { + if (hpts_domains[i].count == 0) { + tcp_bind_threads = 0; + break; + } + } + + printf("TCP Hpts created %d swi interrupt threads and bound %d to %s\n", + created, bound, + tcp_bind_threads == 2 ? "NUMA domains" : "cpus"); } SYSINIT(tcphptsi, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, tcp_init_hptsi, NULL); Modified: head/sys/sys/interrupt.h ============================================================================== --- head/sys/sys/interrupt.h Fri May 10 13:18:22 2019 (r347429) +++ head/sys/sys/interrupt.h Fri May 10 13:41:19 2019 (r347430) @@ -176,6 +176,8 @@ int intr_event_add_handler(struct intr_event *ie, cons int intr_event_bind(struct intr_event *ie, int cpu); int intr_event_bind_irqonly(struct intr_event *ie, int cpu); int intr_event_bind_ithread(struct intr_event *ie, int cpu); +int intr_event_bind_ithread_cpuset(struct intr_event *ie, + cpuset_t *mask); int intr_event_create(struct intr_event **event, void *source, int flags, int irq, void (*pre_ithread)(void *), void (*post_ithread)(void *), void (*post_filter)(void *),
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201905101341.x4ADfJvJ087843>