From owner-svn-src-all@FreeBSD.ORG Tue Mar 9 03:30:32 2010 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id AE7DF1065688; Tue, 9 Mar 2010 03:30:32 +0000 (UTC) (envelope-from alc@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 669FD8FC0A; Tue, 9 Mar 2010 03:30:32 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o293UWvR093412; Tue, 9 Mar 2010 03:30:32 GMT (envelope-from alc@svn.freebsd.org) Received: (from alc@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o293UVtp093407; Tue, 9 Mar 2010 03:30:31 GMT (envelope-from alc@svn.freebsd.org) Message-Id: <201003090330.o293UVtp093407@svn.freebsd.org> From: Alan Cox Date: Tue, 9 Mar 2010 03:30:31 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r204907 - in head/sys/amd64: amd64 include X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 09 Mar 2010 03:30:32 -0000 Author: alc Date: Tue Mar 9 03:30:31 2010 New Revision: 204907 URL: http://svn.freebsd.org/changeset/base/204907 Log: Implement AMD's recommended workaround for Erratum 383 on Family 10h processors. With this workaround, superpage promotion can be re-enabled under virtualization. Moreover, machine check exceptions can safely be enabled when FreeBSD is running natively on Family 10h processors. Most of the credit should go to Andriy Gapon for diagnosing the error and working with Borislav Petkov at AMD to document it. Andriy also reviewed and tested my patches. Discussed with: jhb MFC after: 3 weeks Modified: head/sys/amd64/amd64/mca.c head/sys/amd64/amd64/pmap.c head/sys/amd64/include/md_var.h head/sys/amd64/include/specialreg.h Modified: head/sys/amd64/amd64/mca.c ============================================================================== --- head/sys/amd64/amd64/mca.c Tue Mar 9 02:28:20 2010 (r204906) +++ head/sys/amd64/amd64/mca.c Tue Mar 9 03:30:31 2010 (r204907) @@ -65,6 +65,15 @@ TUNABLE_INT("hw.mca.enabled", &mca_enabl SYSCTL_INT(_hw_mca, OID_AUTO, enabled, CTLFLAG_RDTUN, &mca_enabled, 0, "Administrative toggle for machine check support"); +static int amd10h_L1TP = 1; +TUNABLE_INT("hw.mca.amd10h_L1TP", &amd10h_L1TP); +SYSCTL_INT(_hw_mca, OID_AUTO, amd10h_L1TP, CTLFLAG_RDTUN, &amd10h_L1TP, 0, + "Administrative toggle for logging of level one TLB parity (L1TP) errors"); + +int workaround_erratum383; +SYSCTL_INT(_hw_mca, OID_AUTO, erratum383, CTLFLAG_RD, &workaround_erratum383, 0, + "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); + static STAILQ_HEAD(, mca_internal) mca_records; static struct callout mca_timer; static int mca_ticks = 3600; /* Check hourly by default. */ @@ -481,7 +490,7 @@ void mca_init(void) { uint64_t mcg_cap; - uint64_t ctl; + uint64_t ctl, mask; int skip; int i; @@ -489,6 +498,15 @@ mca_init(void) if (!mca_enabled || !(cpu_feature & CPUID_MCE)) return; + /* + * On AMD Family 10h processors, unless logging of level one TLB + * parity (L1TP) errors is disabled, enable the recommended workaround + * for Erratum 383. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) == 0x10 && amd10h_L1TP) + workaround_erratum383 = 1; + if (cpu_feature & CPUID_MCA) { if (PCPU_GET(cpuid) == 0) mca_setup(); @@ -499,6 +517,22 @@ mca_init(void) /* Enable MCA features. */ wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); + /* + * Disable logging of level one TLB parity (L1TP) errors by + * the data and instruction caches as an alternative + * workaround for AMD Family 10h Erratum 383. Unlike the + * recommended workaround, there is no performance penalty to + * this workaround. However, L1TP errors will go unreported. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) == 0x10 && !amd10h_L1TP) { + mask = rdmsr(MSR_MC0_CTL_MASK); + if ((mask & (1UL << 5)) == 0) + wrmsr(MSR_MC0_CTL_MASK, mask | (1UL << 5)); + mask = rdmsr(MSR_MC1_CTL_MASK); + if ((mask & (1UL << 5)) == 0) + wrmsr(MSR_MC1_CTL_MASK, mask | (1UL << 5)); + } for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { /* By default enable logging of all errors. */ ctl = 0xffffffffffffffffUL; Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Tue Mar 9 02:28:20 2010 (r204906) +++ head/sys/amd64/amd64/pmap.c Tue Mar 9 03:30:31 2010 (r204907) @@ -7,7 +7,7 @@ * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. - * Copyright (c) 2005-2008 Alan L. Cox + * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -255,6 +255,9 @@ static void pmap_remove_entry(struct pma static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); +static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, + pd_entry_t newpde); +static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); @@ -686,13 +689,13 @@ pmap_init(void) pv_entry_high_water = 9 * (pv_entry_max / 10); /* - * Disable large page mappings by default if the kernel is running in - * a virtual machine on an AMD Family 10h processor. This is a work- - * around for Erratum 383. + * If the kernel is running in a virtual machine on an AMD Family 10h + * processor, then it must assume that MCA is enabled by the virtual + * machine monitor. */ if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x10) - pg_ps_enabled = 0; + workaround_erratum383 = 1; /* * Are large page mappings enabled? @@ -848,6 +851,42 @@ pmap_cache_bits(int mode, boolean_t is_p cache_bits |= PG_NC_PWT; return (cache_bits); } + +/* + * After changing the page size for the specified virtual address in the page + * table, flush the corresponding entries from the processor's TLB. Only the + * calling processor's TLB is affected. + * + * The calling thread must be pinned to a processor. + */ +static void +pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) +{ + u_long cr4; + + if ((newpde & PG_PS) == 0) + /* Demotion: flush a specific 2MB page mapping. */ + invlpg(va); + else if ((newpde & PG_G) == 0) + /* + * Promotion: flush every 4KB page mapping from the TLB + * because there are too many to flush individually. + */ + invltlb(); + else { + /* + * Promotion: flush every 4KB page mapping from the TLB, + * including any global (PG_G) mappings. + */ + cr4 = rcr4(); + load_cr4(cr4 & ~CR4_PGE); + /* + * Although preemption at this point could be detrimental to + * performance, it would not lead to an error. + */ + load_cr4(cr4); + } +} #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. @@ -944,6 +983,69 @@ pmap_invalidate_cache(void) smp_cache_flush(); sched_unpin(); } + +struct pde_action { + cpumask_t store; /* processor that updates the PDE */ + cpumask_t invalidate; /* processors that invalidate their TLB */ + vm_offset_t va; + pd_entry_t *pde; + pd_entry_t newpde; +}; + +static void +pmap_update_pde_action(void *arg) +{ + struct pde_action *act = arg; + + if (act->store == PCPU_GET(cpumask)) + pde_store(act->pde, act->newpde); +} + +static void +pmap_update_pde_teardown(void *arg) +{ + struct pde_action *act = arg; + + if ((act->invalidate & PCPU_GET(cpumask)) != 0) + pmap_update_pde_invalidate(act->va, act->newpde); +} + +/* + * Change the page size for the specified virtual address in a way that + * prevents any possibility of the TLB ever having two entries that map the + * same virtual address using different page sizes. This is the recommended + * workaround for Erratum 383 on AMD Family 10h processors. It prevents a + * machine check exception for a TLB state that is improperly diagnosed as a + * hardware error. + */ +static void +pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) +{ + struct pde_action act; + cpumask_t active, cpumask; + + sched_pin(); + cpumask = PCPU_GET(cpumask); + if (pmap == kernel_pmap) + active = all_cpus; + else + active = pmap->pm_active; + if ((active & PCPU_GET(other_cpus)) != 0) { + act.store = cpumask; + act.invalidate = active; + act.va = va; + act.pde = pde; + act.newpde = newpde; + smp_rendezvous_cpus(cpumask | active, + smp_no_rendevous_barrier, pmap_update_pde_action, + pmap_update_pde_teardown, &act); + } else { + pde_store(pde, newpde); + if ((active & cpumask) != 0) + pmap_update_pde_invalidate(va, newpde); + } + sched_unpin(); +} #else /* !SMP */ /* * Normal, non-SMP, invalidation functions. @@ -981,6 +1083,15 @@ pmap_invalidate_cache(void) wbinvd(); } + +static void +pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) +{ + + pde_store(pde, newpde); + if (pmap == kernel_pmap || pmap->pm_active) + pmap_update_pde_invalidate(va, newpde); +} #endif /* !SMP */ static void @@ -2361,7 +2472,10 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ - pde_store(pde, newpde); + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, newpde); + else + pde_store(pde, newpde); /* * Invalidate a stale recursive mapping of the page table page. @@ -2977,7 +3091,10 @@ setpte: /* * Map the superpage. */ - pde_store(pde, PG_PS | newpde); + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, PG_PS | newpde); + else + pde_store(pde, PG_PS | newpde); pmap_pde_promotions++; CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" Modified: head/sys/amd64/include/md_var.h ============================================================================== --- head/sys/amd64/include/md_var.h Tue Mar 9 02:28:20 2010 (r204906) +++ head/sys/amd64/include/md_var.h Tue Mar 9 03:30:31 2010 (r204907) @@ -61,6 +61,7 @@ extern char sigcode[]; extern int szsigcode; extern uint64_t *vm_page_dump; extern int vm_page_dump_size; +extern int workaround_erratum383; extern int _udatasel; extern int _ucodesel; extern int _ucode32sel; Modified: head/sys/amd64/include/specialreg.h ============================================================================== --- head/sys/amd64/include/specialreg.h Tue Mar 9 02:28:20 2010 (r204906) +++ head/sys/amd64/include/specialreg.h Tue Mar 9 03:30:31 2010 (r204907) @@ -494,6 +494,8 @@ #define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */ #define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */ #define MSR_K8_UCODE_UPDATE 0xc0010020 /* update microcode */ +#define MSR_MC0_CTL_MASK 0xc0010044 +#define MSR_MC1_CTL_MASK 0xc0010045 /* VIA ACE crypto featureset: for via_feature_rng */ #define VIA_HAS_RNG 1 /* cpu has RNG */