Date: Mon, 9 May 2011 07:14:16 +0000 (UTC) From: Andriy Gapon <avg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-user@freebsd.org Subject: svn commit: r221688 - in user/avg/xcpu/sys: amd64/amd64 kern Message-ID: <201105090714.p497EGg0081173@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: avg Date: Mon May 9 07:14:16 2011 New Revision: 221688 URL: http://svn.freebsd.org/changeset/base/221688 Log: re-implement smp rendezvous code - create one rendezvous (outgoing) mailbox per each cpu where a cpu would place its rendezvous request directed to other cpus - create a cpu mask for each cpu where other cpus can set a bit to indicate that they send a rendezvous request to the cpu in question - send an ipi only for a first rv request, piggyback subsequent requests if a target cpu is still processing previous incoming requests - many-to-many rv requests can be sent now, there is no locking, the only limitation is that a cpu can have only a single outgoing request at a time - to avoid deadlocks, when a cpu waits for its requested to be completed by target cpus, it also checks for and processes incoming requests - to avoid deadlock with cpu stopping logic, cpus also check for stop requests while waiting - there can be only one cpu asking other cpus to stop; this is implemented via a handrolled spin mutex analogue; similar to the above, to avoid deadlocks a cpu spinning for this lock also checks for an incoming stop request - implement tlb shootdowns via smp rendezvous mechanism, no special ipis are needed now, amd64 only (see if the code can be further simplified) - thus the smp_ipi_mtx is not needed any longer Modified: user/avg/xcpu/sys/amd64/amd64/mp_machdep.c user/avg/xcpu/sys/kern/kern_shutdown.c user/avg/xcpu/sys/kern/subr_smp.c Modified: user/avg/xcpu/sys/amd64/amd64/mp_machdep.c ============================================================================== --- user/avg/xcpu/sys/amd64/amd64/mp_machdep.c Mon May 9 07:13:08 2011 (r221687) +++ user/avg/xcpu/sys/amd64/amd64/mp_machdep.c Mon May 9 07:14:16 2011 (r221688) @@ -1087,67 +1087,66 @@ SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_ &ipi_masked_range_size, 0, ""); #endif /* COUNT_XINVLTLB_HITS */ -/* - * Flush the TLB on all other CPU's - */ +struct tlb_shootdown_params { + u_int type; + vm_offset_t addr1; + vm_offset_t addr2; +}; + static void -smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) +tlb_shootdown_action(void *arg) { - u_int ncpu; + struct tlb_shootdown_params *params; + vm_offset_t addr; - ncpu = mp_ncpus - 1; /* does not shootdown self */ - if (ncpu < 1) - return; /* no other cpus */ - if (!(read_rflags() & PSL_I)) - panic("%s: interrupts disabled", __func__); - mtx_lock_spin(&smp_ipi_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - atomic_store_rel_int(&smp_tlb_wait, 0); - ipi_all_but_self(vector); - while (smp_tlb_wait < ncpu) - ia32_pause(); - mtx_unlock_spin(&smp_ipi_mtx); + params = (struct tlb_shootdown_params *)arg; + switch (params->type) { + case IPI_INVLCACHE: + wbinvd(); + break; + case IPI_INVLTLB: + invltlb(); + break; + case IPI_INVLPG: + invlpg(params->addr1); + break; + case IPI_INVLRNG: + for (addr = params->addr1; addr < params->addr2; + addr += PAGE_SIZE) + invlpg(addr); + break; + default: + panic("Unknown TLB shootdown type %u", params->type); + } } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, + vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + struct tlb_shootdown_params params; - othercpus = mp_ncpus - 1; - if (mask == (cpumask_t)-1) { - ncpu = othercpus; - if (ncpu < 1) - return; - } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) - return; - } +#if 0 if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); - mtx_lock_spin(&smp_ipi_mtx); - smp_tlb_addr1 = addr1; - smp_tlb_addr2 = addr2; - atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (cpumask_t)-1) - ipi_all_but_self(vector); - else - ipi_selected(mask, vector); - while (smp_tlb_wait < ncpu) - ia32_pause(); - mtx_unlock_spin(&smp_ipi_mtx); +#endif + params.type = vector; + params.addr1 = addr1; + params.addr2 = addr2; + smp_rendezvous_cpus(mask & all_cpus & ~(1 << curcpu), + smp_no_rendevous_barrier, tlb_shootdown_action, + smp_no_rendevous_barrier, ¶ms); +} + +/* + * Flush the TLB on all other CPU's + */ +static void +smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) +{ + + smp_targeted_tlb_shootdown(all_cpus & ~(1 << curcpu), + vector, addr1, addr2); } /* Modified: user/avg/xcpu/sys/kern/kern_shutdown.c ============================================================================== --- user/avg/xcpu/sys/kern/kern_shutdown.c Mon May 9 07:13:08 2011 (r221687) +++ user/avg/xcpu/sys/kern/kern_shutdown.c Mon May 9 07:14:16 2011 (r221688) @@ -509,26 +509,9 @@ shutdown_reset(void *junk, int howto) printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ - /* - * Acquiring smp_ipi_mtx here has a double effect: - * - it disables interrupts avoiding CPU0 preemption - * by fast handlers (thus deadlocking against other CPUs) - * - it avoids deadlocks against smp_rendezvous() or, more - * generally, threads busy-waiting, with this spinlock held, - * and waiting for responses by threads on other CPUs - * (ie. smp_tlb_shootdown()). - * - * For the !SMP case it just needs to handle the former problem. - */ -#ifdef SMP - mtx_lock_spin(&smp_ipi_mtx); -#else spinlock_enter(); -#endif - - /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); - /* NOTREACHED */ /* assuming reset worked */ + /* NOTREACHED */ } /* Modified: user/avg/xcpu/sys/kern/subr_smp.c ============================================================================== --- user/avg/xcpu/sys/kern/subr_smp.c Mon May 9 07:13:08 2011 (r221687) +++ user/avg/xcpu/sys/kern/subr_smp.c Mon May 9 07:14:16 2011 (r221688) @@ -101,6 +101,10 @@ SYSCTL_INT(_kern_smp, OID_AUTO, topology "Topology override setting; 0 is default provided by hardware."); TUNABLE_INT("kern.smp.topology", &smp_topology); +unsigned int coalesced_ipi_count; +SYSCTL_INT(_kern_smp, OID_AUTO, coalesced_ipi_count, CTLFLAG_RD, + &coalesced_ipi_count, 0, "Count of coalesced SMP rendezvous IPIs"); + #ifdef SMP /* Enable forwarding of a signal to a process running on a different CPU */ static int forward_signal_enabled = 1; @@ -109,14 +113,20 @@ SYSCTL_INT(_kern_smp, OID_AUTO, forward_ "Forwarding of a signal to a process on a different CPU"); /* Variables needed for SMP rendezvous. */ -static volatile int smp_rv_ncpus; -static void (*volatile smp_rv_setup_func)(void *arg); -static void (*volatile smp_rv_action_func)(void *arg); -static void (*volatile smp_rv_teardown_func)(void *arg); -static void *volatile smp_rv_func_arg; -static volatile int smp_rv_waiters[3]; +struct smp_rendezvous_data { + void (*smp_rv_setup_func)(void *arg); + void (*smp_rv_action_func)(void *arg); + void (*smp_rv_teardown_func)(void *arg); + void *smp_rv_func_arg; + volatile int smp_rv_waiters[2]; + int smp_rv_ncpus; +}; + +static DPCPU_DEFINE(struct smp_rendezvous_data, smp_rv_data); +static volatile DPCPU_DEFINE(cpumask_t, smp_rv_senders); +static volatile DPCPU_DEFINE(cpumask_t, smp_rv_count); -/* +/* * Shared mutex to restrict busywaits between smp_rendezvous() and * smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these * functions trigger at once and cause multiple CPUs to busywait with @@ -397,39 +407,44 @@ unstop_cpus_hard(void) * Note that the supplied external functions _must_ be reentrant and aware * that they are running in parallel and in an unknown lock context. */ -void -smp_rendezvous_action(void) +static void +smp_rendezvous_action_body(int cpu) { - void* local_func_arg = smp_rv_func_arg; - void (*local_setup_func)(void*) = smp_rv_setup_func; - void (*local_action_func)(void*) = smp_rv_action_func; - void (*local_teardown_func)(void*) = smp_rv_teardown_func; - - /* Ensure we have up-to-date values. */ - atomic_add_acq_int(&smp_rv_waiters[0], 1); - while (smp_rv_waiters[0] < smp_rv_ncpus) - cpu_spinwait(); + volatile struct smp_rendezvous_data *rv; + void *local_func_arg; + void (*local_setup_func)(void*); + void (*local_action_func)(void*); + void (*local_teardown_func)(void*); + int ncpus; + + rv = DPCPU_ID_PTR(cpu, smp_rv_data); + local_func_arg = rv->smp_rv_func_arg; + local_setup_func = rv->smp_rv_setup_func; + local_action_func = rv->smp_rv_action_func; + local_teardown_func = rv->smp_rv_teardown_func; + ncpus = rv->smp_rv_ncpus; /* setup function */ if (local_setup_func != smp_no_rendevous_barrier) { - if (smp_rv_setup_func != NULL) - smp_rv_setup_func(smp_rv_func_arg); + if (local_setup_func != NULL) + local_setup_func(local_func_arg); /* spin on entry rendezvous */ - atomic_add_int(&smp_rv_waiters[1], 1); - while (smp_rv_waiters[1] < smp_rv_ncpus) - cpu_spinwait(); + atomic_add_int(&rv->smp_rv_waiters[0], 1); + while (rv->smp_rv_waiters[0] < ncpus) + cpu_spinwait(); } /* action function */ if (local_action_func != NULL) local_action_func(local_func_arg); - /* spin on exit rendezvous */ - atomic_add_int(&smp_rv_waiters[2], 1); + atomic_add_int(&rv->smp_rv_waiters[1], 1); if (local_teardown_func == smp_no_rendevous_barrier) return; - while (smp_rv_waiters[2] < smp_rv_ncpus) + + /* spin on exit rendezvous */ + while (rv->smp_rv_waiters[1] < ncpus) cpu_spinwait(); /* teardown function */ @@ -438,13 +453,95 @@ smp_rendezvous_action(void) } void +smp_rendezvous_action(void) +{ + cpumask_t mask; + int pending; + int count; + int cpu; + + pending = DPCPU_GET(smp_rv_count); + while (pending != 0) { + KASSERT(pending > 0, ("negative pending rendezvous count")); + mask = DPCPU_GET(smp_rv_senders); + if (mask == 0) { + cpu_spinwait(); + continue; + } + + atomic_clear_acq_int(DPCPU_PTR(smp_rv_senders), mask); + count = 0; + do { + count++; + cpu = ffs(mask) - 1; + mask &= ~(1 << cpu); + smp_rendezvous_action_body(cpu); + } while (mask != 0); + + pending = atomic_fetchadd_int(DPCPU_PTR(smp_rv_count), -count); + pending -= count; + } +} + +static void +smp_rendezvous_wait(void) +{ + volatile struct smp_rendezvous_data *rv; + int ncpus; + + rv = DPCPU_PTR(smp_rv_data); + ncpus = rv->smp_rv_ncpus; + + while (atomic_load_acq_int(&rv->smp_rv_waiters[1]) < ncpus) { + /* check for incoming events */ + if ((stopping_cpus & (1 << curcpu)) != 0) + cpustop_handler(); + else if (DPCPU_GET(smp_rv_senders) != 0) + smp_rendezvous_action(); + else + cpu_spinwait(); + } +} + +/* + * Execute the action_func on the targeted CPUs. + * + * setup_func: + * - if a function pointer is given, then first execute the function; + * only after the function is executed on all targeted can they proceed + * to the next step; + * - if NULL is given, this is equivalent to specifying a pointer to an + * empty function; as such there is no actual setup function, but all + * targeted CPUs proceed to the next step at about the same time; + * - smp_no_rendevous_barrier is a special value that signifies that there + * is no setup function nor the targeted CPUs should wait for anything + * before proceeding to the next step. + * + * action_func: + * - a function to be executed on the targeted CPUs; + * NULL is equivalent to specifying a pointer to an empty function. + * + * teardown_func: + * - if a function pointer is given, then first wait for all targeted CPUs + * to complete execution of action_func, then execute this function; + * - if NULL is given, this is equivalent to specifying a pointer to an + * empty function; as such there is no actual teardown action, but all + * targeted CPUs wait for each other to complete execution of action_func; + * - smp_no_rendevous_barrier is a special value that signifies that there + * is no teardown function nor the targeted CPUs should wait for anything + * after completing action_func. + */ +void smp_rendezvous_cpus(cpumask_t map, void (* setup_func)(void *), void (* action_func)(void *), void (* teardown_func)(void *), void *arg) { - int i, ncpus = 0; + volatile struct smp_rendezvous_data *rv; + cpumask_t tmp; + int ncpus; + int cpu; if (!smp_started) { if (setup_func != NULL) @@ -456,39 +553,66 @@ smp_rendezvous_cpus(cpumask_t map, return; } - CPU_FOREACH(i) { - if (((1 << i) & map) != 0) - ncpus++; + map &= all_cpus; + tmp = map; + ncpus = 0; + while (tmp != 0) { + cpu = ffs(tmp) - 1; + tmp &= ~(1 << cpu); + ncpus++; } - if (ncpus == 0) - panic("ncpus is 0 with map=0x%x", map); - /* obtain rendezvous lock */ - mtx_lock_spin(&smp_ipi_mtx); + spinlock_enter(); + + /* + * First wait for an event previously posted by us to complete (if any), + * this is done in case the event was asynchronous. + * In the future we could have a queue of outgoing events instead + * of a single item. + */ + smp_rendezvous_wait(); /* set static function pointers */ - smp_rv_ncpus = ncpus; - smp_rv_setup_func = setup_func; - smp_rv_action_func = action_func; - smp_rv_teardown_func = teardown_func; - smp_rv_func_arg = arg; - smp_rv_waiters[1] = 0; - smp_rv_waiters[2] = 0; - atomic_store_rel_int(&smp_rv_waiters[0], 0); + rv = DPCPU_PTR(smp_rv_data); + rv->smp_rv_ncpus = ncpus; + rv->smp_rv_setup_func = setup_func; + rv->smp_rv_action_func = action_func; + rv->smp_rv_teardown_func = teardown_func; + rv->smp_rv_func_arg = arg; + rv->smp_rv_waiters[1] = 0; + atomic_store_rel_int(&rv->smp_rv_waiters[0], 0); + + /* signal other CPUs, which will enter the IPI with interrupts off */ + tmp = map; + while (tmp != 0) { + cpu = ffs(tmp) - 1; + tmp &= ~(1 << cpu); + + if (cpu == curcpu) + continue; + + KASSERT( + (DPCPU_ID_GET(cpu, smp_rv_senders) & (1 << curcpu)) == 0, + ("curcpu bit is set in target cpu's senders map")); + + /* if we are the first to send an event, then send an ipi */ + if (atomic_fetchadd_int(DPCPU_ID_PTR(cpu, smp_rv_count), 1) + == 0) + ipi_cpu(cpu, IPI_RENDEZVOUS); + else + coalesced_ipi_count++; - /* signal other processors, which will enter the IPI with interrupts off */ - ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS); + atomic_set_rel_int(DPCPU_ID_PTR(cpu, smp_rv_senders), + 1 << curcpu); + } /* Check if the current CPU is in the map */ if ((map & (1 << curcpu)) != 0) - smp_rendezvous_action(); - + smp_rendezvous_action_body(curcpu); if (teardown_func == smp_no_rendevous_barrier) - while (atomic_load_acq_int(&smp_rv_waiters[2]) < ncpus) - cpu_spinwait(); + smp_rendezvous_wait(); - /* release lock */ - mtx_unlock_spin(&smp_ipi_mtx); + spinlock_exit(); } void
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201105090714.p497EGg0081173>