Date: Fri, 16 Mar 2018 11:56:27 +0200 From: Konstantin Belousov <kostikbel@gmail.com> To: Peter Lei <peter.lei@ieee.org> Cc: freebsd-current@freebsd.org, huanghwh@163.com Subject: Re: amd64: panic on -CURRENT @r330539 for certain UEFI hosts Message-ID: <20180316095627.GW76926@kib.kiev.ua> In-Reply-To: <8b8f1352-aa5c-716c-ef6c-3b3cd630043f@ieee.org> References: <8b8f1352-aa5c-716c-ef6c-3b3cd630043f@ieee.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On Thu, Mar 15, 2018 at 09:38:56PM -0500, Peter Lei wrote: > Some recent UEFI implementations have begun to leave the CPU with page > write protection enabled in CR0. > > With r330539 which enables kernel page protections, interesting things > happen during boot (aka panic) when protection is already enabled, > including a write protection fault from an explicit .text fixup write > from xsave->xsaveopt by fpuinit(). > > I see this so far booting -CURRENT under virtual environments: > > - QEMU with recent OVMF EDK2 builds: this is certainly due to UEFI > enabling paging and page protections. > > - VMWare Fusion 10.1.x on Mac: no specific insight on what's going > inside the implementation, but CR0_WP is definitely left enabled before > the kernel is booted. > > I have patched my kernel build to explicitly clear CR0_WP (e.g. in > initializecpu) prior to creating the page tables to get around this, but > someone might have a cleaner/better solution... Try this. diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c index 9dfd44cf82c..1ecec02835c 100644 --- a/sys/amd64/amd64/db_interface.c +++ b/sys/amd64/amd64/db_interface.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include <sys/pcpu.h> #include <machine/cpufunc.h> +#include <machine/md_var.h> #include <machine/specialreg.h> #include <ddb/ddb.h> @@ -75,19 +76,19 @@ db_write_bytes(vm_offset_t addr, size_t size, char *data) jmp_buf jb; void *prev_jb; char *dst; - u_long cr0save; + bool old_wp; int ret; - cr0save = rcr0(); + old_wp = false; prev_jb = kdb_jmpbuf(jb); ret = setjmp(jb); if (ret == 0) { - load_cr0(cr0save & ~CR0_WP); + old_wp = disable_wp(); dst = (char *)addr; while (size-- > 0) *dst++ = *data++; } - load_cr0(cr0save); + restore_wp(old_wp); (void)kdb_jmpbuf(prev_jb); return (ret); } diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c index 72b10396341..39367fa6ffb 100644 --- a/sys/amd64/amd64/fpu.c +++ b/sys/amd64/amd64/fpu.c @@ -205,6 +205,7 @@ fpuinit_bsp1(void) { u_int cp[4]; uint64_t xsave_mask_user; + bool old_wp; if ((cpu_feature2 & CPUID2_XSAVE) != 0) { use_xsave = 1; @@ -233,8 +234,14 @@ fpuinit_bsp1(void) * Patch the XSAVE instruction in the cpu_switch code * to XSAVEOPT. We assume that XSAVE encoding used * REX byte, and set the bit 4 of the r/m byte. + * + * It seems that some BIOSes give control to the OS + * with CR0.WP already set, making the kernel text + * read-only before cpu_startup(). */ + old_wp = disable_wp(); ctx_switch_xsave[3] |= 0x10; + restore_wp(old_wp); } } diff --git a/sys/amd64/amd64/gdb_machdep.c b/sys/amd64/amd64/gdb_machdep.c index 68eb6002593..f7ca3c07ea3 100644 --- a/sys/amd64/amd64/gdb_machdep.c +++ b/sys/amd64/amd64/gdb_machdep.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include <machine/cpufunc.h> #include <machine/frame.h> #include <machine/gdb_machdep.h> +#include <machine/md_var.h> #include <machine/pcb.h> #include <machine/psl.h> #include <machine/reg.h> @@ -127,17 +128,14 @@ gdb_cpu_signal(int type, int code) void * gdb_begin_write(void) { - u_long cr0save; - cr0save = rcr0(); - load_cr0(cr0save & ~CR0_WP); - return ((void *)cr0save); + return (disable_wp() ? &gdb_begin_write : NULL); } void gdb_end_write(void *arg) { - load_cr0((u_long)arg); + restore_wp(arg != NULL); } diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index e340c6cd14d..fcc45eca57d 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -2597,6 +2597,31 @@ clear_pcb_flags(struct pcb *pcb, const u_int flags) : "cc", "memory"); } +/* + * Enable and restore kernel text write permissions. + * Callers must ensure that disable_wp()/restore_wp() are executed + * without rescheduling on the same core. + */ +bool +disable_wp(void) +{ + u_int cr0; + + cr0 = rcr0(); + if ((cr0 & CR0_WP) == 0) + return (false); + load_cr0(cr0 & ~CR0_WP); + return (true); +} + +void +restore_wp(bool old_wp) +{ + + if (old_wp) + load_cr0(rcr0() | CR0_WP); +} + #ifdef KDB /* diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 63dabaf4047..abcc273b6c6 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -53,6 +53,8 @@ void amd64_conf_fast_syscall(void); void amd64_db_resume_dbreg(void); void amd64_lower_shared_page(struct sysentvec *); void amd64_syscall(struct thread *td, int traced); +bool disable_wp(void); +void restore_wp(bool old_wp); void doreti_iret(void) __asm(__STRING(doreti_iret)); void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault)); void ld_ds(void) __asm(__STRING(ld_ds));
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20180316095627.GW76926>