Date: Sat, 9 May 2015 20:26:33 +0000 (UTC) From: Glen Barber <gjb@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r282692 - in projects/release-arm-redux: share/man/man4 sys/amd64/acpica sys/amd64/amd64 sys/amd64/include sys/arm/ti sys/dev/acpica sys/dev/hwpmc sys/dev/iicbus sys/dev/ofw sys/i386/ac... Message-ID: <201505092026.t49KQXek006345@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: gjb Date: Sat May 9 20:26:32 2015 New Revision: 282692 URL: https://svnweb.freebsd.org/changeset/base/282692 Log: MFH: r282673-r282691 This will be the final commit to this branch, in preparation of merging back to ^/head. Sponsored by: The FreeBSD Foundation Modified: projects/release-arm-redux/share/man/man4/acpi.4 projects/release-arm-redux/sys/amd64/acpica/acpi_machdep.c projects/release-arm-redux/sys/amd64/amd64/apic_vector.S projects/release-arm-redux/sys/amd64/amd64/cpu_switch.S projects/release-arm-redux/sys/amd64/amd64/genassym.c projects/release-arm-redux/sys/amd64/amd64/machdep.c projects/release-arm-redux/sys/amd64/amd64/mp_machdep.c projects/release-arm-redux/sys/amd64/amd64/pmap.c projects/release-arm-redux/sys/amd64/amd64/vm_machdep.c projects/release-arm-redux/sys/amd64/include/cpufunc.h projects/release-arm-redux/sys/amd64/include/md_var.h projects/release-arm-redux/sys/amd64/include/pcpu.h projects/release-arm-redux/sys/amd64/include/pmap.h projects/release-arm-redux/sys/amd64/include/smp.h projects/release-arm-redux/sys/arm/ti/ti_i2c.c projects/release-arm-redux/sys/dev/acpica/acpi_cpu.c projects/release-arm-redux/sys/dev/acpica/acpi_package.c projects/release-arm-redux/sys/dev/acpica/acpivar.h projects/release-arm-redux/sys/dev/hwpmc/hwpmc_armv7.c projects/release-arm-redux/sys/dev/hwpmc/hwpmc_e500.c projects/release-arm-redux/sys/dev/hwpmc/hwpmc_mips74k.c projects/release-arm-redux/sys/dev/iicbus/iicbus.c projects/release-arm-redux/sys/dev/iicbus/iicbus.h projects/release-arm-redux/sys/dev/ofw/ofw_iicbus.c projects/release-arm-redux/sys/i386/acpica/acpi_machdep.c projects/release-arm-redux/sys/i386/include/md_var.h projects/release-arm-redux/sys/kern/kern_malloc.c projects/release-arm-redux/sys/kern/kern_thread.c projects/release-arm-redux/sys/vm/uma.h projects/release-arm-redux/sys/vm/uma_core.c projects/release-arm-redux/sys/vm/vm_pageout.c projects/release-arm-redux/sys/x86/include/acpica_machdep.h (contents, props changed) projects/release-arm-redux/sys/x86/include/specialreg.h projects/release-arm-redux/sys/x86/x86/cpu_machdep.c projects/release-arm-redux/sys/x86/xen/xen_apic.c projects/release-arm-redux/usr.sbin/pmcstat/pmcstat_log.c projects/release-arm-redux/usr.sbin/pw/Makefile projects/release-arm-redux/usr.sbin/pw/fileupd.c projects/release-arm-redux/usr.sbin/pw/grupd.c projects/release-arm-redux/usr.sbin/pw/pw_conf.c projects/release-arm-redux/usr.sbin/pw/pw_nis.c projects/release-arm-redux/usr.sbin/pw/pw_user.c Directory Properties: projects/release-arm-redux/ (props changed) projects/release-arm-redux/share/ (props changed) projects/release-arm-redux/share/man/man4/ (props changed) projects/release-arm-redux/sys/ (props changed) Modified: projects/release-arm-redux/share/man/man4/acpi.4 ============================================================================== --- projects/release-arm-redux/share/man/man4/acpi.4 Sat May 9 20:23:37 2015 (r282691) +++ projects/release-arm-redux/share/man/man4/acpi.4 Sat May 9 20:26:32 2015 (r282692) @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd June 23, 2014 +.Dd May 9, 2015 .Dt ACPI 4 .Os .Sh NAME @@ -69,14 +69,12 @@ them (such as Enable dumping Debug objects without .Cd "options ACPI_DEBUG" . Default is 0, ignore Debug objects. -.It Va hw.acpi.acline -AC line state (1 means online, 0 means on battery power). -.It Va hw.acpi.cpu.cx_usage +.It Va dev.cpu.N.cx_usage Debugging information listing the percent of total usage for each sleep state. The values are reset when -.Va hw.acpi.cpu.cx_lowest +.Va dev.cpu.N.cx_lowest is modified. -.It Va hw.acpi.cpu.cx_lowest +.It Va dev.cpu.N.cx_lowest Lowest Cx state to use for idling the CPU. A scheduling algorithm will select states between .Li C1 @@ -111,6 +109,11 @@ semantics as the state. Deeper sleeps provide more power savings but increased transition latency when an interrupt occurs. +.It Va dev.cpu.N.cx_method +List of supported CPU idle states and their transition methods, as +directed by the firmware. +.It Va hw.acpi.acline +AC line state (1 means online, 0 means on battery power). .It Va hw.acpi.disable_on_reboot Disable ACPI during the reboot process. Most systems reboot fine with ACPI still enabled, but some require @@ -374,6 +377,14 @@ typically as a child of a PCI bus. .Pq Vt device Supports an ACPI laptop lid switch, which typically puts a system to sleep. +.It Li mwait +.Pq Vt feature +Do not ask firmware for available x86-vendor specific methods to enter +.Li Cx +sleep states. +Only query and use the generic I/O-based entrance method. +The knob is provided to work around inconsistencies in the tables +filled by firmware. .It Li quirks .Pq Vt feature Do not honor quirks. Modified: projects/release-arm-redux/sys/amd64/acpica/acpi_machdep.c ============================================================================== --- projects/release-arm-redux/sys/amd64/acpica/acpi_machdep.c Sat May 9 20:23:37 2015 (r282691) +++ projects/release-arm-redux/sys/amd64/acpica/acpi_machdep.c Sat May 9 20:26:32 2015 (r282692) @@ -87,13 +87,6 @@ acpi_machdep_quirks(int *quirks) return (0); } -void -acpi_cpu_c1() -{ - - __asm __volatile("sti; hlt"); -} - /* * Support for mapping ACPI tables during early boot. Currently this * uses the crashdump map to map each table. However, the crashdump Modified: projects/release-arm-redux/sys/amd64/amd64/apic_vector.S ============================================================================== --- projects/release-arm-redux/sys/amd64/amd64/apic_vector.S Sat May 9 20:23:37 2015 (r282691) +++ projects/release-arm-redux/sys/amd64/amd64/apic_vector.S Sat May 9 20:26:32 2015 (r282692) @@ -196,8 +196,6 @@ IDTVEC(hv_vmbus_callback) */ .text -#define NAKE_INTR_CS 24 - SUPERALIGN_TEXT invltlb_ret: call as_lapic_eoi @@ -205,30 +203,28 @@ invltlb_ret: jmp doreti_iret SUPERALIGN_TEXT +IDTVEC(invltlb) + PUSH_FRAME + + call invltlb_handler + jmp invltlb_ret + IDTVEC(invltlb_pcid) PUSH_FRAME call invltlb_pcid_handler jmp invltlb_ret - - SUPERALIGN_TEXT -IDTVEC(invltlb) +IDTVEC(invltlb_invpcid) PUSH_FRAME - call invltlb_handler + call invltlb_invpcid_handler jmp invltlb_ret /* * Single page TLB shootdown */ .text - SUPERALIGN_TEXT -IDTVEC(invlpg_pcid) - PUSH_FRAME - - call invlpg_pcid_handler - jmp invltlb_ret SUPERALIGN_TEXT IDTVEC(invlpg) Modified: projects/release-arm-redux/sys/amd64/amd64/cpu_switch.S ============================================================================== --- projects/release-arm-redux/sys/amd64/amd64/cpu_switch.S Sat May 9 20:23:37 2015 (r282691) +++ projects/release-arm-redux/sys/amd64/amd64/cpu_switch.S Sat May 9 20:26:32 2015 (r282692) @@ -69,16 +69,10 @@ * %rsi = newtd */ ENTRY(cpu_throw) - movl PCPU(CPUID),%eax - testq %rdi,%rdi - jz 1f - /* release bit from old pm_active */ - movq PCPU(CURPMAP),%rdx - LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */ -1: - movq TD_PCB(%rsi),%r8 /* newtd->td_pcb */ - movq PCB_CR3(%r8),%rcx /* new address space */ - jmp swact + movq %rsi,%r12 + movq %rsi,%rdi + call pmap_activate_sw + jmp sw1 END(cpu_throw) /* @@ -132,59 +126,20 @@ ctx_switch_xsave: xorl %eax,%eax movq %rax,PCPU(FPCURTHREAD) 3: - /* Save is done. Now fire up new thread. Leave old vmspace. */ - movq TD_PCB(%rsi),%r8 - - /* switch address space */ - movq PCB_CR3(%r8),%rcx - movq %cr3,%rax - cmpq %rcx,%rax /* Same address space? */ - jne swinact - SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ - jmp sw1 -swinact: - movl PCPU(CPUID),%eax - /* Release bit from old pmap->pm_active */ - movq PCPU(CURPMAP),%r12 - LK btrl %eax,PM_ACTIVE(%r12) /* clear old */ - SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */ -swact: - /* Set bit in new pmap->pm_active */ - movq TD_PROC(%rsi),%rdx /* newproc */ - movq P_VMSPACE(%rdx), %rdx - addq $VM_PMAP,%rdx - cmpl $-1,PM_PCID(%rdx) - je 1f - LK btsl %eax,PM_SAVE(%rdx) - jnc 1f - btsq $63,%rcx /* CR3_PCID_SAVE */ - incq PCPU(PM_SAVE_CNT) -1: - movq %rcx,%cr3 /* new address space */ - LK btsl %eax,PM_ACTIVE(%rdx) /* set new */ - movq %rdx,PCPU(CURPMAP) - - /* - * We might lose the race and other CPU might have changed - * the pmap after we set our bit in pmap->pm_save. Recheck. - * Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was - * modified, causing TLB flush for this pcid. - */ - btrq $63,%rcx - jnc 1f - LK btsl %eax,PM_SAVE(%rdx) - jc 1f - decq PCPU(PM_SAVE_CNT) - movq %rcx,%cr3 -1: - + movq %rsi,%r12 + movq %rdi,%r13 + movq %rdx,%r15 + movq %rsi,%rdi + callq pmap_activate_sw + SETLK %r15,TD_LOCK(%r13) /* Release the old thread */ sw1: + movq TD_PCB(%r12),%r8 #if defined(SCHED_ULE) && defined(SMP) /* Wait for the new thread to become unblocked */ movq $blocked_lock, %rdx 1: - movq TD_LOCK(%rsi),%rcx + movq TD_LOCK(%r12),%rcx cmpq %rcx, %rdx pause je 1b @@ -195,13 +150,13 @@ sw1: */ /* Skip loading user fsbase/gsbase for kthreads */ - testl $TDP_KTHREAD,TD_PFLAGS(%rsi) + testl $TDP_KTHREAD,TD_PFLAGS(%r12) jnz do_kthread /* * Load ldt register */ - movq TD_PROC(%rsi),%rcx + movq TD_PROC(%r12),%rcx cmpq $0, P_MD+MD_LDT(%rcx) jne do_ldt xorl %eax,%eax @@ -238,7 +193,7 @@ done_tss: movq %r8,PCPU(CURPCB) /* Update the TSS_RSP0 pointer for the next interrupt */ movq %r8,COMMON_TSS_RSP0(%rdx) - movq %rsi,PCPU(CURTHREAD) /* into next thread */ + movq %r12,PCPU(CURTHREAD) /* into next thread */ /* Test if debug registers should be restored. */ testl $PCB_DBREGS,PCB_FLAGS(%r8) Modified: projects/release-arm-redux/sys/amd64/amd64/genassym.c ============================================================================== --- projects/release-arm-redux/sys/amd64/amd64/genassym.c Sat May 9 20:23:37 2015 (r282691) +++ projects/release-arm-redux/sys/amd64/amd64/genassym.c Sat May 9 20:26:32 2015 (r282692) @@ -71,8 +71,6 @@ __FBSDID("$FreeBSD$"); ASSYM(P_VMSPACE, offsetof(struct proc, p_vmspace)); ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); -ASSYM(PM_SAVE, offsetof(struct pmap, pm_save)); -ASSYM(PM_PCID, offsetof(struct pmap, pm_pcid)); ASSYM(P_MD, offsetof(struct proc, p_md)); ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); Modified: projects/release-arm-redux/sys/amd64/amd64/machdep.c ============================================================================== --- projects/release-arm-redux/sys/amd64/amd64/machdep.c Sat May 9 20:23:37 2015 (r282691) +++ projects/release-arm-redux/sys/amd64/amd64/machdep.c Sat May 9 20:26:32 2015 (r282692) @@ -1718,7 +1718,6 @@ hammer_time(u_int64_t modulep, u_int64_t /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; - thread0.td_pcb->pcb_cr3 = KPML4phys; /* PCID 0 is reserved for kernel */ thread0.td_frame = &proc0_tf; env = kern_getenv("kernelname"); Modified: projects/release-arm-redux/sys/amd64/amd64/mp_machdep.c ============================================================================== --- projects/release-arm-redux/sys/amd64/amd64/mp_machdep.c Sat May 9 20:23:37 2015 (r282691) +++ projects/release-arm-redux/sys/amd64/amd64/mp_machdep.c Sat May 9 20:26:32 2015 (r282692) @@ -88,12 +88,9 @@ char *doublefault_stack; char *nmi_stack; /* Variables needed for SMP tlb shootdown. */ -vm_offset_t smp_tlb_addr2; -struct invpcid_descr smp_tlb_invpcid; +static vm_offset_t smp_tlb_addr1, smp_tlb_addr2; +static pmap_t smp_tlb_pmap; volatile int smp_tlb_wait; -uint64_t pcid_cr3; -pmap_t smp_tlb_pmap; -extern int invpcid_works; extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); @@ -139,14 +136,17 @@ cpu_mp_start(void) /* Install an inter-CPU IPI for TLB invalidation */ if (pmap_pcid_enabled) { - setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT, - SEL_KPL, 0); - setidt(IPI_INVLPG, IDTVEC(invlpg_pcid), SDT_SYSIGT, - SEL_KPL, 0); + if (invpcid_works) { + setidt(IPI_INVLTLB, IDTVEC(invltlb_invpcid), + SDT_SYSIGT, SEL_KPL, 0); + } else { + setidt(IPI_INVLTLB, IDTVEC(invltlb_pcid), SDT_SYSIGT, + SEL_KPL, 0); + } } else { setidt(IPI_INVLTLB, IDTVEC(invltlb), SDT_SYSIGT, SEL_KPL, 0); - setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); } + setidt(IPI_INVLPG, IDTVEC(invlpg), SDT_SYSIGT, SEL_KPL, 0); setidt(IPI_INVLRNG, IDTVEC(invlrng), SDT_SYSIGT, SEL_KPL, 0); /* Install an inter-CPU IPI for cache invalidation. */ @@ -242,6 +242,9 @@ init_secondary(void) pc->pc_gs32p = &gdt[NGDT * cpu + GUGS32_SEL]; pc->pc_ldt = (struct system_segment_descriptor *)&gdt[NGDT * cpu + GUSERLDT_SEL]; + pc->pc_curpmap = kernel_pmap; + pc->pc_pcid_gen = 1; + pc->pc_pcid_next = PMAP_PCID_KERN + 1; /* Save the per-cpu pointer for use by the NMI handler. */ np->np_pcpu = (register_t) pc; @@ -407,35 +410,8 @@ start_ap(int apic_id) } /* - * Flush the TLB on all other CPU's + * Flush the TLB on other CPU's */ -static void -smp_tlb_shootdown(u_int vector, pmap_t pmap, vm_offset_t addr1, - vm_offset_t addr2) -{ - u_int ncpu; - - ncpu = mp_ncpus - 1; /* does not shootdown self */ - if (ncpu < 1) - return; /* no other cpus */ - if (!(read_rflags() & PSL_I)) - panic("%s: interrupts disabled", __func__); - mtx_lock_spin(&smp_ipi_mtx); - smp_tlb_invpcid.addr = addr1; - if (pmap == NULL) { - smp_tlb_invpcid.pcid = 0; - } else { - smp_tlb_invpcid.pcid = pmap->pm_pcid; - pcid_cr3 = pmap->pm_cr3; - } - smp_tlb_addr2 = addr2; - smp_tlb_pmap = pmap; - atomic_store_rel_int(&smp_tlb_wait, 0); - ipi_all_but_self(vector); - while (smp_tlb_wait < ncpu) - ia32_pause(); - mtx_unlock_spin(&smp_ipi_mtx); -} static void smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap, @@ -443,7 +419,11 @@ smp_targeted_tlb_shootdown(cpuset_t mask { int cpu, ncpu, othercpus; - othercpus = mp_ncpus - 1; + othercpus = mp_ncpus - 1; /* does not shootdown self */ + + /* + * Check for other cpus. Return if none. + */ if (CPU_ISFULLSET(&mask)) { if (othercpus < 1) return; @@ -452,16 +432,11 @@ smp_targeted_tlb_shootdown(cpuset_t mask if (CPU_EMPTY(&mask)) return; } + if (!(read_rflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); - smp_tlb_invpcid.addr = addr1; - if (pmap == NULL) { - smp_tlb_invpcid.pcid = 0; - } else { - smp_tlb_invpcid.pcid = pmap->pm_pcid; - pcid_cr3 = pmap->pm_cr3; - } + smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; smp_tlb_pmap = pmap; atomic_store_rel_int(&smp_tlb_wait, 0); @@ -485,65 +460,39 @@ smp_targeted_tlb_shootdown(cpuset_t mask } void -smp_invlpg(pmap_t pmap, vm_offset_t addr) -{ - - if (smp_started) { - smp_tlb_shootdown(IPI_INVLPG, pmap, addr, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_page++; -#endif - } -} - -void -smp_invlpg_range(pmap_t pmap, vm_offset_t addr1, vm_offset_t addr2) -{ - - if (smp_started) { - smp_tlb_shootdown(IPI_INVLRNG, pmap, addr1, addr2); -#ifdef COUNT_XINVLTLB_HITS - ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; -#endif - } -} - -void smp_masked_invltlb(cpuset_t mask, pmap_t pmap) { if (smp_started) { smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0); #ifdef COUNT_XINVLTLB_HITS - ipi_masked_global++; + ipi_global++; #endif } } void -smp_masked_invlpg(cpuset_t mask, pmap_t pmap, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0); + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, NULL, addr, 0); #ifdef COUNT_XINVLTLB_HITS - ipi_masked_page++; + ipi_page++; #endif } } void -smp_masked_invlpg_range(cpuset_t mask, pmap_t pmap, vm_offset_t addr1, - vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { - smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, - addr2); + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, NULL, + addr1, addr2); #ifdef COUNT_XINVLTLB_HITS - ipi_masked_range++; - ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; #endif } } @@ -552,19 +501,9 @@ void smp_cache_flush(void) { - if (smp_started) - smp_tlb_shootdown(IPI_INVLCACHE, NULL, 0, 0); -} - -void -smp_invltlb(pmap_t pmap) -{ - if (smp_started) { - smp_tlb_shootdown(IPI_INVLTLB, pmap, 0, 0); -#ifdef COUNT_XINVLTLB_HITS - ipi_global++; -#endif + smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, + 0, 0); } } @@ -586,10 +525,10 @@ invltlb_handler(void) } void -invltlb_pcid_handler(void) +invltlb_invpcid_handler(void) { - uint64_t cr3; - u_int cpuid; + struct invpcid_descr d; + #ifdef COUNT_XINVLTLB_HITS xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ @@ -597,49 +536,45 @@ invltlb_pcid_handler(void) (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - if (smp_tlb_invpcid.pcid != (uint64_t)-1 && - smp_tlb_invpcid.pcid != 0) { - if (invpcid_works) { - invpcid(&smp_tlb_invpcid, INVPCID_CTX); - } else { - /* Otherwise reload %cr3 twice. */ - cr3 = rcr3(); - if (cr3 != pcid_cr3) { - load_cr3(pcid_cr3); - cr3 |= CR3_PCID_SAVE; - } - load_cr3(cr3); - } - } else { - invltlb_globpcid(); - } - if (smp_tlb_pmap != NULL) { - cpuid = PCPU_GET(cpuid); - if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active)) - CPU_CLR_ATOMIC(cpuid, &smp_tlb_pmap->pm_save); - } - + d.pcid = smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid; + d.pad = 0; + d.addr = 0; + invpcid(&d, smp_tlb_pmap == kernel_pmap ? INVPCID_CTXGLOB : + INVPCID_CTX); atomic_add_int(&smp_tlb_wait, 1); } void -invlpg_handler(void) +invltlb_pcid_handler(void) { #ifdef COUNT_XINVLTLB_HITS - xhits_pg[PCPU_GET(cpuid)]++; + xhits_gbl[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ #ifdef COUNT_IPIS - (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; + (*ipi_invltlb_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - invlpg(smp_tlb_invpcid.addr); + if (smp_tlb_pmap == kernel_pmap) { + invltlb_globpcid(); + } else { + /* + * The current pmap might not be equal to + * smp_tlb_pmap. The clearing of the pm_gen in + * pmap_invalidate_all() takes care of TLB + * invalidation when switching to the pmap on this + * CPU. + */ + if (PCPU_GET(curpmap) == smp_tlb_pmap) { + load_cr3(smp_tlb_pmap->pm_cr3 | + smp_tlb_pmap->pm_pcids[PCPU_GET(cpuid)].pm_pcid); + } + } atomic_add_int(&smp_tlb_wait, 1); } void -invlpg_pcid_handler(void) +invlpg_handler(void) { - uint64_t cr3; #ifdef COUNT_XINVLTLB_HITS xhits_pg[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ @@ -647,45 +582,15 @@ invlpg_pcid_handler(void) (*ipi_invlpg_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - if (smp_tlb_invpcid.pcid == (uint64_t)-1) { - invltlb_globpcid(); - } else if (smp_tlb_invpcid.pcid == 0) { - invlpg(smp_tlb_invpcid.addr); - } else if (invpcid_works) { - invpcid(&smp_tlb_invpcid, INVPCID_ADDR); - } else { - /* - * PCID supported, but INVPCID is not. - * Temporarily switch to the target address - * space and do INVLPG. - */ - cr3 = rcr3(); - if (cr3 != pcid_cr3) - load_cr3(pcid_cr3 | CR3_PCID_SAVE); - invlpg(smp_tlb_invpcid.addr); - load_cr3(cr3 | CR3_PCID_SAVE); - } - + invlpg(smp_tlb_addr1); atomic_add_int(&smp_tlb_wait, 1); } -static inline void -invlpg_range(vm_offset_t start, vm_offset_t end) -{ - - do { - invlpg(start); - start += PAGE_SIZE; - } while (start < end); -} - void invlrng_handler(void) { - struct invpcid_descr d; vm_offset_t addr; - uint64_t cr3; - u_int cpuid; + #ifdef COUNT_XINVLTLB_HITS xhits_rng[PCPU_GET(cpuid)]++; #endif /* COUNT_XINVLTLB_HITS */ @@ -693,38 +598,11 @@ invlrng_handler(void) (*ipi_invlrng_counts[PCPU_GET(cpuid)])++; #endif /* COUNT_IPIS */ - addr = smp_tlb_invpcid.addr; - if (pmap_pcid_enabled) { - if (smp_tlb_invpcid.pcid == 0) { - /* - * kernel pmap - use invlpg to invalidate - * global mapping. - */ - invlpg_range(addr, smp_tlb_addr2); - } else if (smp_tlb_invpcid.pcid == (uint64_t)-1) { - invltlb_globpcid(); - if (smp_tlb_pmap != NULL) { - cpuid = PCPU_GET(cpuid); - if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active)) - CPU_CLR_ATOMIC(cpuid, - &smp_tlb_pmap->pm_save); - } - } else if (invpcid_works) { - d = smp_tlb_invpcid; - do { - invpcid(&d, INVPCID_ADDR); - d.addr += PAGE_SIZE; - } while (d.addr <= smp_tlb_addr2); - } else { - cr3 = rcr3(); - if (cr3 != pcid_cr3) - load_cr3(pcid_cr3 | CR3_PCID_SAVE); - invlpg_range(addr, smp_tlb_addr2); - load_cr3(cr3 | CR3_PCID_SAVE); - } - } else { - invlpg_range(addr, smp_tlb_addr2); - } + addr = smp_tlb_addr1; + do { + invlpg(addr); + addr += PAGE_SIZE; + } while (addr < smp_tlb_addr2); atomic_add_int(&smp_tlb_wait, 1); } Modified: projects/release-arm-redux/sys/amd64/amd64/pmap.c ============================================================================== --- projects/release-arm-redux/sys/amd64/amd64/pmap.c Sat May 9 20:23:37 2015 (r282691) +++ projects/release-arm-redux/sys/amd64/amd64/pmap.c Sat May 9 20:26:32 2015 (r282692) @@ -273,6 +273,8 @@ pmap_modified_bit(pmap_t pmap) return (mask); } +extern struct pcpu __pcpu[]; + #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline @@ -379,8 +381,6 @@ caddr_t CADDR1 = 0; static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */ -static struct unrhdr pcid_unr; -static struct mtx pcid_mtx; int pmap_pcid_enabled = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?"); @@ -827,6 +827,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) { vm_offset_t va; pt_entry_t *pte; + int i; /* * Create an initial set of page tables to run the kernel in. @@ -861,7 +862,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr) kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_cr3 = KPML4phys; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ - CPU_FILL(&kernel_pmap->pm_save); /* always superset of pm_active */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); kernel_pmap->pm_flags = pmap_flags; @@ -895,18 +895,28 @@ pmap_bootstrap(vm_paddr_t *firstaddr) /* Initialize TLB Context Id. */ TUNABLE_INT_FETCH("vm.pmap.pcid_enabled", &pmap_pcid_enabled); if ((cpu_feature2 & CPUID2_PCID) != 0 && pmap_pcid_enabled) { - load_cr4(rcr4() | CR4_PCIDE); - mtx_init(&pcid_mtx, "pcid", NULL, MTX_DEF); - init_unrhdr(&pcid_unr, 1, (1 << 12) - 1, &pcid_mtx); /* Check for INVPCID support */ invpcid_works = (cpu_stdext_feature & CPUID_STDEXT_INVPCID) != 0; - kernel_pmap->pm_pcid = 0; -#ifndef SMP + for (i = 0; i < MAXCPU; i++) { + kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN; + kernel_pmap->pm_pcids[i].pm_gen = 1; + } + __pcpu[0].pc_pcid_next = PMAP_PCID_KERN + 1; + __pcpu[0].pc_pcid_gen = 1; + /* + * pcpu area for APs is zeroed during AP startup. + * pc_pcid_next and pc_pcid_gen are initialized by AP + * during pcpu setup. + */ +#ifdef SMP + load_cr4(rcr4() | CR4_PCIDE); +#else pmap_pcid_enabled = 0; #endif - } else + } else { pmap_pcid_enabled = 0; + } } /* @@ -1277,28 +1287,6 @@ pmap_update_pde_invalidate(pmap_t pmap, } #ifdef SMP -static void -pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va) -{ - struct invpcid_descr d; - uint64_t cr3; - - if (invpcid_works) { - d.pcid = pmap->pm_pcid; - d.pad = 0; - d.addr = va; - invpcid(&d, INVPCID_ADDR); - return; - } - - cr3 = rcr3(); - critical_enter(); - load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE); - invlpg(va); - load_cr3(cr3 | CR3_PCID_SAVE); - critical_exit(); -} - /* * For SMP, these functions have to use the IPI mechanism for coherence. * @@ -1361,8 +1349,8 @@ pmap_invalidate_ept(pmap_t pmap) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpuset_t other_cpus; - u_int cpuid; + cpuset_t *mask; + u_int cpuid, i; if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); @@ -1373,74 +1361,33 @@ pmap_invalidate_page(pmap_t pmap, vm_off ("pmap_invalidate_page: invalid type %d", pmap->pm_type)); sched_pin(); - if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { - if (!pmap_pcid_enabled) { - invlpg(va); - } else { - if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) { - if (pmap == PCPU_GET(curpmap)) - invlpg(va); - else - pmap_invalidate_page_pcid(pmap, va); - } else { - invltlb_globpcid(); - } - } - smp_invlpg(pmap, va); + if (pmap == kernel_pmap) { + invlpg(va); + mask = &all_cpus; } else { cpuid = PCPU_GET(cpuid); - other_cpus = all_cpus; - CPU_CLR(cpuid, &other_cpus); - if (CPU_ISSET(cpuid, &pmap->pm_active)) + if (pmap == PCPU_GET(curpmap)) invlpg(va); - else if (pmap_pcid_enabled) { - if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) - pmap_invalidate_page_pcid(pmap, va); - else - invltlb_globpcid(); + else if (pmap_pcid_enabled) + pmap->pm_pcids[cpuid].pm_gen = 0; + if (pmap_pcid_enabled) { + CPU_FOREACH(i) { + if (cpuid != i) + pmap->pm_pcids[i].pm_gen = 0; + } } - if (pmap_pcid_enabled) - CPU_AND(&other_cpus, &pmap->pm_save); - else - CPU_AND(&other_cpus, &pmap->pm_active); - if (!CPU_EMPTY(&other_cpus)) - smp_masked_invlpg(other_cpus, pmap, va); + mask = &pmap->pm_active; } + smp_masked_invlpg(*mask, va); sched_unpin(); } -static void -pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) -{ - struct invpcid_descr d; - uint64_t cr3; - vm_offset_t addr; - - if (invpcid_works) { - d.pcid = pmap->pm_pcid; - d.pad = 0; - for (addr = sva; addr < eva; addr += PAGE_SIZE) { - d.addr = addr; - invpcid(&d, INVPCID_ADDR); - } - return; - } - - cr3 = rcr3(); - critical_enter(); - load_cr3(pmap->pm_cr3 | CR3_PCID_SAVE); - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - load_cr3(cr3 | CR3_PCID_SAVE); - critical_exit(); -} - void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpuset_t other_cpus; + cpuset_t *mask; vm_offset_t addr; - u_int cpuid; + u_int cpuid, i; if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); @@ -1451,55 +1398,36 @@ pmap_invalidate_range(pmap_t pmap, vm_of ("pmap_invalidate_range: invalid type %d", pmap->pm_type)); sched_pin(); - if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { - if (!pmap_pcid_enabled) { - for (addr = sva; addr < eva; addr += PAGE_SIZE) - invlpg(addr); - } else { - if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) { - if (pmap == PCPU_GET(curpmap)) { - for (addr = sva; addr < eva; - addr += PAGE_SIZE) - invlpg(addr); - } else { - pmap_invalidate_range_pcid(pmap, - sva, eva); - } - } else { - invltlb_globpcid(); - } - } - smp_invlpg_range(pmap, sva, eva); + cpuid = PCPU_GET(cpuid); + if (pmap == kernel_pmap) { + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + mask = &all_cpus; } else { - cpuid = PCPU_GET(cpuid); - other_cpus = all_cpus; - CPU_CLR(cpuid, &other_cpus); - if (CPU_ISSET(cpuid, &pmap->pm_active)) { + if (pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } else if (pmap_pcid_enabled) { - if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) - pmap_invalidate_range_pcid(pmap, sva, eva); - else - invltlb_globpcid(); + pmap->pm_pcids[cpuid].pm_gen = 0; } - if (pmap_pcid_enabled) - CPU_AND(&other_cpus, &pmap->pm_save); - else - CPU_AND(&other_cpus, &pmap->pm_active); - if (!CPU_EMPTY(&other_cpus)) - smp_masked_invlpg_range(other_cpus, pmap, sva, eva); + if (pmap_pcid_enabled) { + CPU_FOREACH(i) { + if (cpuid != i) + pmap->pm_pcids[i].pm_gen = 0; + } + } + mask = &pmap->pm_active; } + smp_masked_invlpg_range(*mask, sva, eva); sched_unpin(); } void pmap_invalidate_all(pmap_t pmap) { - cpuset_t other_cpus; + cpuset_t *mask; struct invpcid_descr d; - uint64_t cr3; - u_int cpuid; + u_int cpuid, i; if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); @@ -1510,60 +1438,42 @@ pmap_invalidate_all(pmap_t pmap) ("pmap_invalidate_all: invalid type %d", pmap->pm_type)); sched_pin(); - cpuid = PCPU_GET(cpuid); - if (pmap == kernel_pmap || - (pmap_pcid_enabled && !CPU_CMP(&pmap->pm_save, &all_cpus)) || - !CPU_CMP(&pmap->pm_active, &all_cpus)) { - if (invpcid_works) { + if (pmap == kernel_pmap) { + if (pmap_pcid_enabled && invpcid_works) { bzero(&d, sizeof(d)); invpcid(&d, INVPCID_CTXGLOB); } else { invltlb_globpcid(); } - if (!CPU_ISSET(cpuid, &pmap->pm_active)) - CPU_CLR_ATOMIC(cpuid, &pmap->pm_save); - smp_invltlb(pmap); + mask = &all_cpus; } else { - other_cpus = all_cpus; - CPU_CLR(cpuid, &other_cpus); - - /* - * This logic is duplicated in the Xinvltlb shootdown - * IPI handler. - */ - if (pmap_pcid_enabled) { - if (pmap->pm_pcid != -1 && pmap->pm_pcid != 0) { + cpuid = PCPU_GET(cpuid); + if (pmap == PCPU_GET(curpmap)) { + if (pmap_pcid_enabled) { if (invpcid_works) { - d.pcid = pmap->pm_pcid; + d.pcid = pmap->pm_pcids[cpuid].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); } else { - cr3 = rcr3(); - critical_enter(); - - /* - * Bit 63 is clear, pcid TLB - * entries are invalidated. - */ - load_cr3(pmap->pm_cr3); - load_cr3(cr3 | CR3_PCID_SAVE); - critical_exit(); + load_cr3(pmap->pm_cr3 | pmap->pm_pcids + [PCPU_GET(cpuid)].pm_pcid); } } else { - invltlb_globpcid(); + invltlb(); } - } else if (CPU_ISSET(cpuid, &pmap->pm_active)) - invltlb(); - if (!CPU_ISSET(cpuid, &pmap->pm_active)) - CPU_CLR_ATOMIC(cpuid, &pmap->pm_save); - if (pmap_pcid_enabled) - CPU_AND(&other_cpus, &pmap->pm_save); - else - CPU_AND(&other_cpus, &pmap->pm_active); - if (!CPU_EMPTY(&other_cpus)) - smp_masked_invltlb(other_cpus, pmap); + } else if (pmap_pcid_enabled) { + pmap->pm_pcids[cpuid].pm_gen = 0; + } + if (pmap_pcid_enabled) { *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201505092026.t49KQXek006345>