Date: Wed, 15 Jun 2022 15:39:26 GMT From: Mark Johnston <markj@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 756bc3adc578 - main - kasan: Create a shadow for the bootstack prior to hammer_time() Message-ID: <202206151539.25FFdQdZ085416@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=756bc3adc578077d530e7f64683d4fc8383030ce commit 756bc3adc578077d530e7f64683d4fc8383030ce Author: Mark Johnston <markj@FreeBSD.org> AuthorDate: 2022-06-15 14:48:16 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2022-06-15 15:39:10 +0000 kasan: Create a shadow for the bootstack prior to hammer_time() When the kernel is compiled with -asan-stack=true, the address sanitizer will emit inline accesses to the shadow map. In other words, some shadow map accesses are not intercepted by the KASAN runtime, so they cannot be disabled even if the runtime is not yet initialized by kasan_init() at the end of hammer_time(). This went unnoticed because the loader will initialize all PML4 entries of the bootstrap page table to point to the same PDP page, so early shadow map accesses do not raise a page fault, though they are silently corrupting memory. In fact, when the loader does not copy the staging area, we do get a page fault since in that case only the first and last PML4Es are populated by the loader. But due to another bug, the loader always treated KASAN kernels as non-relocatable and thus always copied the staging area. It is not really practical to annotate hammer_time() and all callees with __nosanitizeaddress, so instead add some early initialization which creates a shadow for the boot stack used by hammer_time(). This is only needed by KASAN, not by KMSAN, but the shared pmap code handles both. Reported by: mhorne Reviewed by: kib MFC after: 1 month Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D35449 --- sys/amd64/amd64/locore.S | 22 ++++++--- sys/amd64/amd64/machdep.c | 64 +++++++++++++------------- sys/amd64/amd64/pmap.c | 111 ++++++++++++++++++++++++++++++++++++++++++++- sys/amd64/include/asan.h | 6 +++ sys/amd64/include/md_var.h | 1 + sys/amd64/include/pmap.h | 1 + sys/kern/subr_asan.c | 6 +++ sys/sys/asan.h | 3 +- 8 files changed, 174 insertions(+), 40 deletions(-) diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S index f2eedb402ef3..f034a25c9b1b 100644 --- a/sys/amd64/amd64/locore.S +++ b/sys/amd64/amd64/locore.S @@ -48,6 +48,8 @@ .set dmapbase,DMAP_MIN_ADDRESS .set dmapend,DMAP_MAX_ADDRESS +#define BOOTSTACK_SIZE 4096 + .text /********************************************************************** * @@ -66,14 +68,22 @@ ENTRY(btext) pushq $PSL_KERNEL popfq - /* Find the metadata pointers before we lose them */ + /* Get onto a stack that we can trust - there is no going back now. */ movq %rsp, %rbp + movq $bootstack,%rsp + +#ifdef KASAN + /* Bootstrap a shadow map for the boot stack. */ + movq $bootstack, %rdi + subq $BOOTSTACK_SIZE, %rdi + movq $BOOTSTACK_SIZE, %rsi + call kasan_init_early +#endif + + /* Grab metadata pointers from the loader. */ movl 4(%rbp),%edi /* modulep (arg 1) */ movl 8(%rbp),%esi /* kernend (arg 2) */ - - /* Get onto a stack that we can trust - there is no going back now. */ - movq $bootstack,%rsp - xorl %ebp, %ebp + xorq %rbp, %rbp call hammer_time /* set up cpu for unix operation */ movq %rax,%rsp /* set up kstack for mi_startup() */ @@ -140,5 +150,5 @@ ENTRY(la57_trampoline_end) .bss ALIGN_DATA /* just to be sure */ .globl bootstack - .space 0x1000 /* space for bootstack - temporary stack */ + .space BOOTSTACK_SIZE /* space for bootstack - temporary stack */ bootstack: diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 55a278de6020..9979592acc19 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1260,16 +1260,43 @@ amd64_bsp_ist_init(struct pcpu *pc) tssp->tss_ist4 = (long)np; } +/* + * Calculate the kernel load address by inspecting page table created by loader. + * The assumptions: + * - kernel is mapped at KERNBASE, backed by contiguous phys memory + * aligned at 2M, below 4G (the latter is important for AP startup) + * - there is a 2M hole at KERNBASE (KERNSTART = KERNBASE + 2M) + * - kernel is mapped with 2M superpages + * - all participating memory, i.e. kernel, modules, metadata, + * page table is accessible by pre-created 1:1 mapping + * (right now loader creates 1:1 mapping for lower 4G, and all + * memory is from there) + * - there is a usable memory block right after the end of the + * mapped kernel and all modules/metadata, pointed to by + * physfree, for early allocations + */ +vm_paddr_t __nosanitizeaddress __nosanitizememory +amd64_loadaddr(void) +{ + pml4_entry_t *pml4e; + pdp_entry_t *pdpe; + pd_entry_t *pde; + uint64_t cr3; + + cr3 = rcr3(); + pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(KERNSTART); + pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(KERNSTART); + pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(KERNSTART); + return (*pde & PG_FRAME); +} + u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; - uint64_t cr3, rsp0; - pml4_entry_t *pml4e; - pdp_entry_t *pdpe; - pd_entry_t *pde; + uint64_t rsp0; char *env; struct user_segment_descriptor *gdt; struct region_descriptor r_gdt; @@ -1278,34 +1305,9 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) TSRAW(&thread0, TS_ENTER, __func__, NULL); - /* - * Calculate kernphys by inspecting page table created by loader. - * The assumptions: - * - kernel is mapped at KERNBASE, backed by contiguous phys memory - * aligned at 2M, below 4G (the latter is important for AP startup) - * - there is a 2M hole at KERNBASE - * - kernel is mapped with 2M superpages - * - all participating memory, i.e. kernel, modules, metadata, - * page table is accessible by pre-created 1:1 mapping - * (right now loader creates 1:1 mapping for lower 4G, and all - * memory is from there) - * - there is a usable memory block right after the end of the - * mapped kernel and all modules/metadata, pointed to by - * physfree, for early allocations - */ - cr3 = rcr3(); - pml4e = (pml4_entry_t *)(cr3 & ~PAGE_MASK) + pmap_pml4e_index( - (vm_offset_t)hammer_time); - pdpe = (pdp_entry_t *)(*pml4e & ~PAGE_MASK) + pmap_pdpe_index( - (vm_offset_t)hammer_time); - pde = (pd_entry_t *)(*pdpe & ~PAGE_MASK) + pmap_pde_index( - (vm_offset_t)hammer_time); - kernphys = (vm_paddr_t)(*pde & ~PDRMASK) - - (vm_paddr_t)(((vm_offset_t)hammer_time - KERNBASE) & ~PDRMASK); - - /* Fix-up for 2M hole */ + kernphys = amd64_loadaddr(); + physfree += kernphys; - kernphys += NBPDR; kmdp = init_ops.parse_preload_data(modulep); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index a4e796becc73..f35a8c4c789c 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -11429,6 +11429,107 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) } #if defined(KASAN) || defined(KMSAN) + +/* + * Reserve enough memory to: + * 1) allocate PDP pages for the shadow map(s), + * 2) shadow one page of memory, so one PD page, one PT page, and one shadow + * page per shadow map. + */ +#ifdef KASAN +#define SAN_EARLY_PAGES (NKASANPML4E + 3) +#else +#define SAN_EARLY_PAGES (NKMSANSHADPML4E + NKMSANORIGPML4E + 2 * 3) +#endif + +static uint64_t __nosanitizeaddress __nosanitizememory +pmap_san_enter_early_alloc_4k(uint64_t pabase) +{ + static uint8_t data[PAGE_SIZE * SAN_EARLY_PAGES] __aligned(PAGE_SIZE); + static size_t offset = 0; + uint64_t pa; + + if (offset == sizeof(data)) { + panic("%s: ran out of memory for the bootstrap shadow map", + __func__); + } + + pa = pabase + ((vm_offset_t)&data[offset] - KERNSTART); + offset += PAGE_SIZE; + return (pa); +} + +/* + * Map a shadow page, before the kernel has bootstrapped its page tables. This + * is currently only used to shadow the temporary boot stack set up by locore. + */ +static void __nosanitizeaddress __nosanitizememory +pmap_san_enter_early(vm_offset_t va) +{ + static bool first = true; + pml4_entry_t *pml4e; + pdp_entry_t *pdpe; + pd_entry_t *pde; + pt_entry_t *pte; + uint64_t cr3, pa, base; + int i; + + base = amd64_loadaddr(); + cr3 = rcr3(); + + if (first) { + /* + * If this the first call, we need to allocate new PML4Es for + * the bootstrap shadow map(s). We don't know how the PML4 page + * was initialized by the boot loader, so we can't simply test + * whether the shadow map's PML4Es are zero. + */ + first = false; +#ifdef KASAN + for (i = 0; i < NKASANPML4E; i++) { + pa = pmap_san_enter_early_alloc_4k(base); + + pml4e = (pml4_entry_t *)cr3 + + pmap_pml4e_index(KASAN_MIN_ADDRESS + i * NBPML4); + *pml4e = (pml4_entry_t)(pa | X86_PG_RW | X86_PG_V); + } +#else + for (i = 0; i < NKMSANORIGPML4E; i++) { + pa = pmap_san_enter_early_alloc_4k(base); + + pml4e = (pml4_entry_t *)cr3 + + pmap_pml4e_index(KMSAN_ORIG_MIN_ADDRESS + + i * NBPML4); + *pml4e = (pml4_entry_t)(pa | X86_PG_RW | X86_PG_V); + } + for (i = 0; i < NKMSANSHADPML4E; i++) { + pa = pmap_san_enter_early_alloc_4k(base); + + pml4e = (pml4_entry_t *)cr3 + + pmap_pml4e_index(KMSAN_SHAD_MIN_ADDRESS + + i * NBPML4); + *pml4e = (pml4_entry_t)(pa | X86_PG_RW | X86_PG_V); + } +#endif + } + pml4e = (pml4_entry_t *)cr3 + pmap_pml4e_index(va); + pdpe = (pdp_entry_t *)(*pml4e & PG_FRAME) + pmap_pdpe_index(va); + if (*pdpe == 0) { + pa = pmap_san_enter_early_alloc_4k(base); + *pdpe = (pdp_entry_t)(pa | X86_PG_RW | X86_PG_V); + } + pde = (pd_entry_t *)(*pdpe & PG_FRAME) + pmap_pde_index(va); + if (*pde == 0) { + pa = pmap_san_enter_early_alloc_4k(base); + *pde = (pd_entry_t)(pa | X86_PG_RW | X86_PG_V); + } + pte = (pt_entry_t *)(*pde & PG_FRAME) + pmap_pte_index(va); + if (*pte != 0) + panic("%s: PTE for %#lx is already initialized", __func__, va); + pa = pmap_san_enter_early_alloc_4k(base); + *pte = (pt_entry_t)(pa | X86_PG_A | X86_PG_M | X86_PG_RW | X86_PG_V); +} + static vm_page_t pmap_san_enter_alloc_4k(void) { @@ -11452,7 +11553,7 @@ pmap_san_enter_alloc_2m(void) * Grow a shadow map by at least one 4KB page at the specified address. Use 2MB * pages when possible. */ -void +void __nosanitizeaddress __nosanitizememory pmap_san_enter(vm_offset_t va) { pdp_entry_t *pdpe; @@ -11460,6 +11561,14 @@ pmap_san_enter(vm_offset_t va) pt_entry_t *pte; vm_page_t m; + if (kernphys == 0) { + /* + * We're creating a temporary shadow map for the boot stack. + */ + pmap_san_enter_early(va); + return; + } + mtx_assert(&kernel_map->system_mtx, MA_OWNED); pdpe = pmap_pdpe(kernel_pmap, va); diff --git a/sys/amd64/include/asan.h b/sys/amd64/include/asan.h index 03d57673d05e..a27fbbcb30c7 100644 --- a/sys/amd64/include/asan.h +++ b/sys/amd64/include/asan.h @@ -66,6 +66,12 @@ kasan_md_init(void) { } +static inline void +kasan_md_init_early(vm_offset_t bootstack, size_t size) +{ + kasan_shadow_map(bootstack, size); +} + #endif /* KASAN */ #endif /* !_MACHINE_ASAN_H_ */ diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h index 196836b5baea..f014c66c0d06 100644 --- a/sys/amd64/include/md_var.h +++ b/sys/amd64/include/md_var.h @@ -64,6 +64,7 @@ struct sysentvec; void amd64_conf_fast_syscall(void); void amd64_db_resume_dbreg(void); +vm_paddr_t amd64_loadaddr(void); void amd64_lower_shared_page(struct sysentvec *); void amd64_bsp_pcpu_init1(struct pcpu *pc); void amd64_bsp_pcpu_init2(uint64_t rsp0); diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index 9aacae73ebd4..3d51803d82b7 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -533,6 +533,7 @@ void pmap_page_array_startup(long count); vm_page_t pmap_page_alloc_below_4g(bool zeroed); #if defined(KASAN) || defined(KMSAN) +void pmap_san_bootstrap(void); void pmap_san_enter(vm_offset_t); #endif diff --git a/sys/kern/subr_asan.c b/sys/kern/subr_asan.c index 003b89f888e9..11f7996cfe73 100644 --- a/sys/kern/subr_asan.c +++ b/sys/kern/subr_asan.c @@ -139,6 +139,12 @@ kasan_init(void) kasan_enabled = true; } +void +kasan_init_early(vm_offset_t stack, size_t size) +{ + kasan_md_init_early(stack, size); +} + static inline const char * kasan_code_name(uint8_t code) { diff --git a/sys/sys/asan.h b/sys/sys/asan.h index caa6643bda68..0a9d94007bec 100644 --- a/sys/sys/asan.h +++ b/sys/sys/asan.h @@ -56,11 +56,10 @@ #define KASAN_EXEC_ARGS_FREED 0xFF void kasan_init(void); +void kasan_init_early(vm_offset_t, size_t); void kasan_shadow_map(vm_offset_t, size_t); - void kasan_mark(const void *, size_t, size_t, uint8_t); #else /* KASAN */ -#define kasan_early_init(u) #define kasan_init() #define kasan_shadow_map(a, s) #define kasan_mark(p, s, l, c)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202206151539.25FFdQdZ085416>