Date: Tue, 6 Mar 2018 14:28:38 +0000 (UTC) From: "Jonathan T. Looney" <jtl@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r330539 - in head/sys: amd64/amd64 amd64/include arm/include conf gdb i386/include mips/include powerpc/include sparc64/include Message-ID: <201803061428.w26EScwJ020926@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jtl Date: Tue Mar 6 14:28:37 2018 New Revision: 330539 URL: https://svnweb.freebsd.org/changeset/base/330539 Log: amd64: Protect the kernel text, data, and BSS by setting the RW/NX bits correctly for the data contained on each memory page. There are several components to this change: * Add a variable to indicate the start of the R/W portion of the initial memory. * Stop detecting NX bit support for each AP. Instead, use the value from the BSP and, if supported, activate the feature on the other APs just before loading the correct page table. (Functionally, we already assume that the BSP and all APs had the same support or lack of support for the NX bit.) * Set the RW and NX bits correctly for the kernel text, data, and BSS (subject to some caveats below). * Ensure DDB can write to memory when necessary (such as to set a breakpoint). * Ensure GDB can write to memory when necessary (such as to set a breakpoint). For this purpose, add new MD functions gdb_begin_write() and gdb_end_write() which the GDB support code can call before and after writing to memory. This change is not comprehensive: * It doesn't do anything to protect modules. * It doesn't do anything for kernel memory allocated after the kernel starts running. * In order to avoid excessive memory inefficiency, it may let multiple types of data share a 2M page, and assigns the most permissions needed for data on that page. Reviewed by: jhb, kib Discussed with: emaste MFC after: 2 weeks Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D14282 Modified: head/sys/amd64/amd64/db_interface.c head/sys/amd64/amd64/gdb_machdep.c head/sys/amd64/amd64/initcpu.c head/sys/amd64/amd64/mpboot.S head/sys/amd64/amd64/pmap.c head/sys/amd64/include/cpu.h head/sys/amd64/include/gdb_machdep.h head/sys/arm/include/gdb_machdep.h head/sys/conf/ldscript.amd64 head/sys/gdb/gdb_packet.c head/sys/i386/include/gdb_machdep.h head/sys/mips/include/gdb_machdep.h head/sys/powerpc/include/gdb_machdep.h head/sys/sparc64/include/gdb_machdep.h Modified: head/sys/amd64/amd64/db_interface.c ============================================================================== --- head/sys/amd64/amd64/db_interface.c Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/amd64/amd64/db_interface.c Tue Mar 6 14:28:37 2018 (r330539) @@ -36,6 +36,9 @@ __FBSDID("$FreeBSD$"); #include <sys/kdb.h> #include <sys/pcpu.h> +#include <machine/cpufunc.h> +#include <machine/specialreg.h> + #include <ddb/ddb.h> /* @@ -62,6 +65,9 @@ db_read_bytes(vm_offset_t addr, size_t size, char *dat /* * Write bytes to kernel address space for debugger. + * We need to disable write protection temporarily so we can write + * things (such as break points) that might be in write-protected + * memory. */ int db_write_bytes(vm_offset_t addr, size_t size, char *data) @@ -69,15 +75,19 @@ db_write_bytes(vm_offset_t addr, size_t size, char *da jmp_buf jb; void *prev_jb; char *dst; + u_long cr0save; int ret; + cr0save = rcr0(); prev_jb = kdb_jmpbuf(jb); ret = setjmp(jb); if (ret == 0) { + load_cr0(cr0save & ~CR0_WP); dst = (char *)addr; while (size-- > 0) *dst++ = *data++; } + load_cr0(cr0save); (void)kdb_jmpbuf(prev_jb); return (ret); } Modified: head/sys/amd64/amd64/gdb_machdep.c ============================================================================== --- head/sys/amd64/amd64/gdb_machdep.c Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/amd64/amd64/gdb_machdep.c Tue Mar 6 14:28:37 2018 (r330539) @@ -36,11 +36,13 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/signal.h> +#include <machine/cpufunc.h> #include <machine/frame.h> #include <machine/gdb_machdep.h> #include <machine/pcb.h> #include <machine/psl.h> #include <machine/reg.h> +#include <machine/specialreg.h> #include <machine/trap.h> #include <machine/frame.h> #include <machine/endian.h> @@ -121,3 +123,21 @@ gdb_cpu_signal(int type, int code) } return (SIGEMT); } + +void * +gdb_begin_write(void) +{ + u_long cr0save; + + cr0save = rcr0(); + load_cr0(cr0save & ~CR0_WP); + return ((void *)cr0save); +} + +void +gdb_end_write(void *arg) +{ + + load_cr0((u_long)arg); +} + Modified: head/sys/amd64/amd64/initcpu.c ============================================================================== --- head/sys/amd64/amd64/initcpu.c Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/amd64/amd64/initcpu.c Tue Mar 6 14:28:37 2018 (r330539) @@ -218,7 +218,7 @@ initializecpu(void) if (!IS_BSP() && (cpu_stdext_feature & CPUID_STDEXT_SMEP)) cr4 |= CR4_SMEP; load_cr4(cr4); - if ((amd_feature & AMDID_NX) != 0) { + if (IS_BSP() && (amd_feature & AMDID_NX) != 0) { msr = rdmsr(MSR_EFER) | EFER_NXE; wrmsr(MSR_EFER, msr); pg_nx = PG_NX; Modified: head/sys/amd64/amd64/mpboot.S ============================================================================== --- head/sys/amd64/amd64/mpboot.S Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/amd64/amd64/mpboot.S Tue Mar 6 14:28:37 2018 (r330539) @@ -221,15 +221,31 @@ mptramp_end: /* * From here on down is executed in the kernel .text section. - * - * Load a real %cr3 that has all the direct map stuff and switches - * off the 1GB replicated mirror. Load a stack pointer and jump - * into AP startup code in C. */ .text .code64 .p2align 4,0 entry_64: + /* + * If the BSP reported NXE support, enable EFER.NXE for all APs + * prior to loading %cr3. This avoids page faults if the AP + * encounters memory marked with the NX bit prior to detecting and + * enabling NXE support. + */ + movq pg_nx, %rbx + testq %rbx, %rbx + je 1f + movl $MSR_EFER, %ecx + rdmsr + orl $EFER_NXE, %eax + wrmsr + +1: + /* + * Load a real %cr3 that has all the direct map stuff and switches + * off the 1GB replicated mirror. Load a stack pointer and jump + * into AP startup code in C. + */ movq KPML4phys, %rax movq %rax, %cr3 movq bootSTK, %rsp Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/amd64/amd64/pmap.c Tue Mar 6 14:28:37 2018 (r330539) @@ -870,14 +870,64 @@ nkpt_init(vm_paddr_t addr) nkpt = pt_pages; } +/* + * Returns the proper write/execute permission for a physical page that is + * part of the initial boot allocations. + * + * If the page has kernel text, it is marked as read-only. If the page has + * kernel read-only data, it is marked as read-only/not-executable. If the + * page has only read-write data, it is marked as read-write/not-executable. + * If the page is below/above the kernel range, it is marked as read-write. + * + * This function operates on 2M pages, since we map the kernel space that + * way. + * + * Note that this doesn't currently provide any protection for modules. + */ +static inline pt_entry_t +bootaddr_rwx(vm_paddr_t pa) +{ + + /* + * Everything in the same 2M page as the start of the kernel + * should be static. On the other hand, things in the same 2M + * page as the end of the kernel could be read-write/executable, + * as the kernel image is not guaranteed to end on a 2M boundary. + */ + if (pa < trunc_2mpage(btext - KERNBASE) || + pa >= trunc_2mpage(_end - KERNBASE)) + return (X86_PG_RW); + /* + * The linker should ensure that the read-only and read-write + * portions don't share the same 2M page, so this shouldn't + * impact read-only data. However, in any case, any page with + * read-write data needs to be read-write. + */ + if (pa >= trunc_2mpage(brwsection - KERNBASE)) + return (X86_PG_RW | pg_nx); + /* + * Mark any 2M page containing kernel text as read-only. Mark + * other pages with read-only data as read-only and not executable. + * (It is likely a small portion of the read-only data section will + * be marked as read-only, but executable. This should be acceptable + * since the read-only protection will keep the data from changing.) + * Note that fixups to the .text section will still work until we + * set CR0.WP. + */ + if (pa < round_2mpage(etext - KERNBASE)) + return (0); + return (pg_nx); +} + static void create_pagetables(vm_paddr_t *firstaddr) { - int i, j, ndm1g, nkpdpe; + int i, j, ndm1g, nkpdpe, nkdmpde; pt_entry_t *pt_p; pd_entry_t *pd_p; pdp_entry_t *pdp_p; pml4_entry_t *p4_p; + uint64_t DMPDkernphys; /* Allocate page table pages for the direct map */ ndmpdp = howmany(ptoa(Maxmem), NBPDP); @@ -896,8 +946,20 @@ create_pagetables(vm_paddr_t *firstaddr) } DMPDPphys = allocpages(firstaddr, ndmpdpphys); ndm1g = 0; - if ((amd_feature & AMDID_PAGE1GB) != 0) + if ((amd_feature & AMDID_PAGE1GB) != 0) { + /* + * Calculate the number of 1G pages that will fully fit in + * Maxmem. + */ ndm1g = ptoa(Maxmem) >> PDPSHIFT; + + /* + * Allocate 2M pages for the kernel. These will be used in + * place of the first one or more 1G pages from ndm1g. + */ + nkdmpde = howmany((vm_offset_t)(brwsection - KERNBASE), NBPDP); + DMPDkernphys = allocpages(firstaddr, nkdmpde); + } if (ndm1g < ndmpdp) DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g); dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; @@ -923,11 +985,10 @@ create_pagetables(vm_paddr_t *firstaddr) KPDphys = allocpages(firstaddr, nkpdpe); /* Fill in the underlying page table pages */ - /* Nominally read-only (but really R/W) from zero to physfree */ /* XXX not fully used, underneath 2M pages */ pt_p = (pt_entry_t *)KPTphys; for (i = 0; ptoa(i) < *firstaddr; i++) - pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | pg_g; + pt_p[i] = ptoa(i) | X86_PG_V | pg_g | bootaddr_rwx(ptoa(i)); /* Now map the page tables at their location within PTmap */ pd_p = (pd_entry_t *)KPDphys; @@ -937,8 +998,8 @@ create_pagetables(vm_paddr_t *firstaddr) /* Map from zero to end of allocations under 2M pages */ /* This replaces some of the KPTphys entries above */ for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) - pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS | - pg_g; + pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g | + bootaddr_rwx(i << PDRSHIFT); /* * Because we map the physical blocks in 2M pages, adjust firstaddr @@ -978,6 +1039,22 @@ create_pagetables(vm_paddr_t *firstaddr) for (j = 0; i < ndmpdp; i++, j++) { pdp_p[i] = DMPDphys + ptoa(j); pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_U; + } + + /* + * Instead of using a 1G page for the memory containing the kernel, + * use 2M pages with appropriate permissions. (If using 1G pages, + * this will partially overwrite the PDPEs above.) + */ + if (ndm1g) { + pd_p = (pd_entry_t *)DMPDkernphys; + for (i = 0; i < (NPDEPG * nkdmpde); i++) + pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g | + X86_PG_M | X86_PG_A | pg_nx | + bootaddr_rwx(i << PDRSHIFT); + for (i = 0; i < nkdmpde; i++) + pdp_p[i] = (DMPDkernphys + ptoa(i)) | X86_PG_RW | + X86_PG_V | PG_U; } /* And recursively map PML4 to itself in order to get PTmap */ Modified: head/sys/amd64/include/cpu.h ============================================================================== --- head/sys/amd64/include/cpu.h Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/amd64/include/cpu.h Tue Mar 6 14:28:37 2018 (r330539) @@ -68,7 +68,9 @@ struct cpu_ops { }; extern struct cpu_ops cpu_ops; +extern char brwsection[]; extern char btext[]; +extern char _end[]; extern char etext[]; /* Resume hook for VMM. */ Modified: head/sys/amd64/include/gdb_machdep.h ============================================================================== --- head/sys/amd64/include/gdb_machdep.h Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/amd64/include/gdb_machdep.h Tue Mar 6 14:28:37 2018 (r330539) @@ -47,8 +47,10 @@ gdb_cpu_query(void) return (0); } +void *gdb_begin_write(void); void *gdb_cpu_getreg(int, size_t *); void gdb_cpu_setreg(int, void *); int gdb_cpu_signal(int, int); +void gdb_end_write(void *); #endif /* !_MACHINE_GDB_MACHDEP_H_ */ Modified: head/sys/arm/include/gdb_machdep.h ============================================================================== --- head/sys/arm/include/gdb_machdep.h Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/arm/include/gdb_machdep.h Tue Mar 6 14:28:37 2018 (r330539) @@ -47,6 +47,19 @@ gdb_cpu_query(void) return (0); } +static __inline void * +gdb_begin_write(void) +{ + + return (NULL); +} + +static __inline void +gdb_end_write(void *arg __unused) +{ + +} + void *gdb_cpu_getreg(int, size_t *); void gdb_cpu_setreg(int, void *); int gdb_cpu_signal(int, int); Modified: head/sys/conf/ldscript.amd64 ============================================================================== --- head/sys/conf/ldscript.amd64 Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/conf/ldscript.amd64 Tue Mar 6 14:28:37 2018 (r330539) @@ -80,6 +80,7 @@ SECTIONS /* Adjust the address for the data segment. We want to adjust up to the same address within the page on the next page up. */ . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE)); + PROVIDE (brwsection = .); /* Exception handling */ .eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) } .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) } Modified: head/sys/gdb/gdb_packet.c ============================================================================== --- head/sys/gdb/gdb_packet.c Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/gdb/gdb_packet.c Tue Mar 6 14:28:37 2018 (r330539) @@ -147,6 +147,7 @@ gdb_rx_mem(unsigned char *addr, size_t size) { unsigned char *p; void *prev; + void *wctx; jmp_buf jb; size_t cnt; int ret; @@ -155,6 +156,7 @@ gdb_rx_mem(unsigned char *addr, size_t size) if (size * 2 != gdb_rxsz) return (-1); + wctx = gdb_begin_write(); prev = kdb_jmpbuf(jb); ret = setjmp(jb); if (ret == 0) { @@ -170,6 +172,7 @@ gdb_rx_mem(unsigned char *addr, size_t size) kdb_cpu_sync_icache(addr, size); } (void)kdb_jmpbuf(prev); + gdb_end_write(wctx); return ((ret == 0) ? 1 : 0); } Modified: head/sys/i386/include/gdb_machdep.h ============================================================================== --- head/sys/i386/include/gdb_machdep.h Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/i386/include/gdb_machdep.h Tue Mar 6 14:28:37 2018 (r330539) @@ -47,6 +47,19 @@ gdb_cpu_query(void) return (0); } +static __inline void * +gdb_begin_write(void) +{ + + return (NULL); +} + +static __inline void +gdb_end_write(void *arg __unused) +{ + +} + void *gdb_cpu_getreg(int, size_t *); void gdb_cpu_setreg(int, void *); int gdb_cpu_signal(int, int); Modified: head/sys/mips/include/gdb_machdep.h ============================================================================== --- head/sys/mips/include/gdb_machdep.h Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/mips/include/gdb_machdep.h Tue Mar 6 14:28:37 2018 (r330539) @@ -51,6 +51,19 @@ gdb_cpu_query(void) return (0); } +static __inline void * +gdb_begin_write(void) +{ + + return (NULL); +} + +static __inline void +gdb_end_write(void *arg __unused) +{ + +} + void *gdb_cpu_getreg(int, size_t *); void gdb_cpu_setreg(int, void *); int gdb_cpu_signal(int, int); Modified: head/sys/powerpc/include/gdb_machdep.h ============================================================================== --- head/sys/powerpc/include/gdb_machdep.h Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/powerpc/include/gdb_machdep.h Tue Mar 6 14:28:37 2018 (r330539) @@ -76,6 +76,19 @@ gdb_cpu_query(void) return (0); } +static __inline void * +gdb_begin_write(void) +{ + + return (NULL); +} + +static __inline void +gdb_end_write(void *arg __unused) +{ + +} + void *gdb_cpu_getreg(int, size_t *); void gdb_cpu_setreg(int, void *); int gdb_cpu_signal(int, int); Modified: head/sys/sparc64/include/gdb_machdep.h ============================================================================== --- head/sys/sparc64/include/gdb_machdep.h Tue Mar 6 14:18:45 2018 (r330538) +++ head/sys/sparc64/include/gdb_machdep.h Tue Mar 6 14:28:37 2018 (r330539) @@ -53,6 +53,19 @@ gdb_cpu_signal(int vector, int _) return (vector); } +static __inline void * +gdb_begin_write(void) +{ + + return (NULL); +} + +static __inline void +gdb_end_write(void *arg __unused) +{ + +} + void *gdb_cpu_getreg(int, size_t *); void gdb_cpu_setreg(int, void *);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803061428.w26EScwJ020926>