Date: Fri, 13 Apr 2018 20:30:49 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r332489 - in head: gnu/usr.bin/gdb/kgdb sys/conf sys/dev/dcons sys/dev/hyperv/vmbus/i386 sys/dev/ppc sys/dev/syscons sys/i386/conf sys/i386/i386 sys/i386/include sys/i386/include/pc sys... Message-ID: <201804132030.w3DKUnFn050153@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Fri Apr 13 20:30:49 2018 New Revision: 332489 URL: https://svnweb.freebsd.org/changeset/base/332489 Log: i386 4/4G split. The change makes the user and kernel address spaces on i386 independent, giving each almost the full 4G of usable virtual addresses except for one PDE at top used for trampoline and per-CPU trampoline stacks, and system structures that must be always mapped, namely IDT, GDT, common TSS and LDT, and process-private TSS and LDT if allocated. By using 1:1 mapping for the kernel text and data, it appeared possible to eliminate assembler part of the locore.S which bootstraps initial page table and KPTmap. The code is rewritten in C and moved into the pmap_cold(). The comment in vmparam.h explains the KVA layout. There is no PCID mechanism available in protected mode, so each kernel/user switch forth and back completely flushes the TLB, except for the trampoline PTD region. The TLB invalidations for userspace becomes trivial, because IPI handlers switch page tables. On the other hand, context switches no longer need to reload %cr3. copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for new copyout(9) is compatibility with wiring user buffers around sysctl handlers. This explains two kind of locks for copyout ptes and accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow path, is only tried after the 'fast path' failed, which temporary changes mapping to the userspace and copies the data to/from small per-cpu buffer in the trampoline. If a page fault occurs during the copy, it is short-circuit by exception.s to not even reach C code. The change was motivated by the need to implement the Meltdown mitigation, but instead of KPTI the full split is done. The i386 architecture already shows the sizing problems, in particular, it is impossible to link clang and lld with debugging. I expect that the issues due to the virtual address space limits would only exaggerate and the split gives more liveness to the platform. Tested by: pho Discussed with: bde Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D14633 Added: head/sys/i386/i386/copyout.c (contents, props changed) head/sys/i386/i386/copyout_fast.s - copied, changed from r332488, head/sys/i386/i386/support.s Modified: head/gnu/usr.bin/gdb/kgdb/trgt_i386.c head/sys/conf/files.i386 head/sys/conf/ldscript.i386 head/sys/dev/dcons/dcons_crom.c head/sys/dev/dcons/dcons_os.c head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S head/sys/dev/ppc/ppc.c head/sys/dev/syscons/syscons.c head/sys/i386/conf/NOTES head/sys/i386/i386/apic_vector.s head/sys/i386/i386/atpic_vector.s head/sys/i386/i386/bios.c head/sys/i386/i386/db_interface.c head/sys/i386/i386/db_trace.c head/sys/i386/i386/elf_machdep.c head/sys/i386/i386/exception.s head/sys/i386/i386/genassym.c head/sys/i386/i386/locore.s head/sys/i386/i386/machdep.c head/sys/i386/i386/mem.c head/sys/i386/i386/minidump_machdep.c head/sys/i386/i386/mp_machdep.c head/sys/i386/i386/mpboot.s head/sys/i386/i386/pmap.c head/sys/i386/i386/sigtramp.s head/sys/i386/i386/support.s head/sys/i386/i386/swtch.s head/sys/i386/i386/sys_machdep.c head/sys/i386/i386/trap.c head/sys/i386/i386/vm86.c head/sys/i386/i386/vm86bios.s head/sys/i386/i386/vm_machdep.c head/sys/i386/include/asmacros.h head/sys/i386/include/frame.h head/sys/i386/include/md_var.h head/sys/i386/include/param.h head/sys/i386/include/pc/bios.h head/sys/i386/include/pcpu.h head/sys/i386/include/pmap.h head/sys/i386/include/segments.h head/sys/i386/include/vmparam.h head/sys/kern/imgact_aout.c head/sys/kern/subr_witness.c head/sys/x86/acpica/acpi_wakeup.c head/sys/x86/x86/local_apic.c head/sys/x86/x86/mp_x86.c head/sys/x86/x86/mptable.c Modified: head/gnu/usr.bin/gdb/kgdb/trgt_i386.c ============================================================================== --- head/gnu/usr.bin/gdb/kgdb/trgt_i386.c Fri Apr 13 19:43:23 2018 (r332488) +++ head/gnu/usr.bin/gdb/kgdb/trgt_i386.c Fri Apr 13 20:30:49 2018 (r332489) @@ -29,6 +29,8 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/proc.h> +#include <vm/vm.h> +#include <vm/pmap.h> #include <machine/pcb.h> #include <machine/frame.h> #include <machine/segments.h> @@ -279,12 +281,26 @@ kgdb_trgt_frame_cache(struct frame_info *next_frame, v char buf[MAX_REGISTER_SIZE]; struct kgdb_frame_cache *cache; char *pname; + CORE_ADDR pcx; + uintptr_t addr, setidt_disp; cache = *this_cache; if (cache == NULL) { cache = FRAME_OBSTACK_ZALLOC(struct kgdb_frame_cache); *this_cache = cache; - cache->pc = frame_func_unwind(next_frame); + pcx = frame_pc_unwind(next_frame); + if (pcx >= PMAP_TRM_MIN_ADDRESS) { + addr = kgdb_lookup("setidt_disp"); + if (addr != 0) { + if (kvm_read(kvm, addr, &setidt_disp, + sizeof(setidt_disp)) != + sizeof(setidt_disp)) + warnx("kvm_read: %s", kvm_geterr(kvm)); + else + pcx -= setidt_disp; + } + } + cache->pc = pcx; find_pc_partial_function(cache->pc, &pname, NULL, NULL); if (pname[0] != 'X') cache->frame_type = FT_NORMAL; @@ -373,6 +389,8 @@ kgdb_trgt_trapframe_sniffer(struct frame_info *next_fr CORE_ADDR pc; pc = frame_pc_unwind(next_frame); + if (pc >= PMAP_TRM_MIN_ADDRESS) + return (&kgdb_trgt_trapframe_unwind); pname = NULL; find_pc_partial_function(pc, &pname, NULL, NULL); if (pname == NULL) Modified: head/sys/conf/files.i386 ============================================================================== --- head/sys/conf/files.i386 Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/conf/files.i386 Fri Apr 13 20:30:49 2018 (r332489) @@ -483,6 +483,7 @@ i386/i386/atomic.c standard \ i386/i386/bios.c standard i386/i386/bioscall.s standard i386/i386/bpf_jit_machdep.c optional bpf_jitter +i386/i386/copyout.c standard i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb Modified: head/sys/conf/ldscript.i386 ============================================================================== --- head/sys/conf/ldscript.i386 Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/conf/ldscript.i386 Fri Apr 13 20:30:49 2018 (r332489) @@ -6,7 +6,7 @@ SEARCH_DIR(/usr/lib); SECTIONS { /* Read-only sections, merged into text segment: */ - . = kernbase + kernload + SIZEOF_HEADERS; + . = kernbase + SIZEOF_HEADERS; .interp : { *(.interp) } .hash : { *(.hash) } .gnu.hash : { *(.gnu.hash) } Modified: head/sys/dev/dcons/dcons_crom.c ============================================================================== --- head/sys/dev/dcons/dcons_crom.c Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/dev/dcons/dcons_crom.c Fri Apr 13 20:30:49 2018 (r332489) @@ -109,7 +109,11 @@ dcons_crom_expose_idt(struct dcons_crom_softc *sc) static off_t idt_paddr; /* XXX */ +#ifdef __amd64__ idt_paddr = (char *)idt - (char *)KERNBASE; +#else /* __i386__ */ + idt_paddr = (off_t)pmap_kextract((vm_offset_t)idt); +#endif crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_HI, ADDR_HI(idt_paddr)); crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_LO, ADDR_LO(idt_paddr)); Modified: head/sys/dev/dcons/dcons_os.c ============================================================================== --- head/sys/dev/dcons/dcons_os.c Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/dev/dcons/dcons_os.c Fri Apr 13 20:30:49 2018 (r332489) @@ -309,11 +309,16 @@ dcons_drv_init(int stage) * Allow read/write access to dcons buffer. */ for (pa = trunc_page(addr); pa < addr + size; pa += PAGE_SIZE) - *vtopte(KERNBASE + pa) |= PG_RW; + *vtopte(PMAP_MAP_LOW + pa) |= PG_RW; invltlb(); #endif /* XXX P to V */ +#ifdef __amd64__ dg.buf = (struct dcons_buf *)(vm_offset_t)(KERNBASE + addr); +#else /* __i386__ */ + dg.buf = (struct dcons_buf *)((vm_offset_t)PMAP_MAP_LOW + + addr); +#endif dg.size = size; if (dcons_load_buffer(dg.buf, dg.size, sc) < 0) dg.buf = NULL; Modified: head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S ============================================================================== --- head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/dev/hyperv/vmbus/i386/vmbus_vector.S Fri Apr 13 20:30:49 2018 (r332489) @@ -26,11 +26,12 @@ * $FreeBSD$ */ +#include "assym.inc" + +#include <machine/psl.h> #include <machine/asmacros.h> #include <machine/specialreg.h> -#include "assym.inc" - /* * This is the Hyper-V vmbus channel direct callback interrupt. * Only used when it is running on Hyper-V. @@ -42,6 +43,7 @@ IDTVEC(vmbus_isr) PUSH_FRAME SET_KERNEL_SREGS cld + KENTER FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp call vmbus_handle_intr Modified: head/sys/dev/ppc/ppc.c ============================================================================== --- head/sys/dev/ppc/ppc.c Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/dev/ppc/ppc.c Fri Apr 13 20:30:49 2018 (r332489) @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm.h> #include <vm/pmap.h> #include <machine/vmparam.h> +#include <machine/pc/bios.h> #endif #include <dev/ppbus/ppbconf.h> @@ -121,7 +122,7 @@ static char *ppc_epp_protocol[] = { " (EPP 1.9)", " (E * BIOS printer list - used by BIOS probe. */ #define BIOS_PPC_PORTS 0x408 -#define BIOS_PORTS (short *)(KERNBASE+BIOS_PPC_PORTS) +#define BIOS_PORTS ((short *)BIOS_PADDRTOVADDR(BIOS_PPC_PORTS)) #define BIOS_MAX_PPC 4 #endif Modified: head/sys/dev/syscons/syscons.c ============================================================================== --- head/sys/dev/syscons/syscons.c Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/dev/syscons/syscons.c Fri Apr 13 20:30:49 2018 (r332489) @@ -288,7 +288,11 @@ ec_putc(int c) * This is enough for ec_putc() to work very early on x86 * if the kernel starts in normal color text mode. */ +#ifdef __amd64__ fb = KERNBASE + 0xb8000; +#else /* __i386__ */ + fb = PMAP_MAP_LOW + 0xb8000; +#endif xsize = 80; ysize = 25; #endif Modified: head/sys/i386/conf/NOTES ============================================================================== --- head/sys/i386/conf/NOTES Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/i386/conf/NOTES Fri Apr 13 20:30:49 2018 (r332489) @@ -895,19 +895,6 @@ options ENABLE_ALART # Control alarm on Intel intpm options PMAP_SHPGPERPROC=201 # -# Change the size of the kernel virtual address space. Due to -# constraints in loader(8) on i386, this must be a multiple of 4. -# 256 = 1 GB of kernel address space. Increasing this also causes -# a reduction of the address space in user processes. 512 splits -# the 4GB cpu address space in half (2GB user, 2GB kernel). For PAE -# kernels, the value will need to be double non-PAE. A value of 1024 -# for PAE kernels is necessary to split the address space in half. -# This will likely need to be increased to handle memory sizes >4GB. -# PAE kernels default to a value of 512. -# -options KVA_PAGES=260 - -# # Number of initial kernel page table pages used for early bootstrap. # This number should include enough pages to map the kernel, any # modules or other data loaded with the kernel by the loader, and data @@ -950,22 +937,6 @@ device ndis ##################################################################### # VM OPTIONS - -# Disable the 4 MByte page PSE CPU feature. The PSE feature allows the -# kernel to use 4 MByte pages to map the kernel instead of 4k pages. -# This saves on the amount of memory needed for page tables needed to -# map the kernel. You should only disable this feature as a temporary -# workaround if you are having problems with it enabled. -# -#options DISABLE_PSE - -# Disable the global pages PGE CPU feature. The PGE feature allows pages -# to be marked with the PG_G bit. TLB entries for these pages are not -# flushed from the cache when %cr3 is reloaded. This can make context -# switches less expensive. You should only disable this feature as a -# temporary workaround if you are having problems with it enabled. -# -#options DISABLE_PG_G # KSTACK_PAGES is the number of memory pages to assign to the kernel # stack of each thread. Modified: head/sys/i386/i386/apic_vector.s ============================================================================== --- head/sys/i386/i386/apic_vector.s Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/i386/i386/apic_vector.s Fri Apr 13 20:30:49 2018 (r332489) @@ -39,6 +39,7 @@ #include "opt_smp.h" #include <machine/asmacros.h> +#include <machine/psl.h> #include <machine/specialreg.h> #include <x86/apicreg.h> @@ -67,34 +68,39 @@ as_lapic_eoi: * translates that into a vector, and passes the vector to the * lapic_handle_intr() function. */ -#define ISR_VEC(index, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name ## _pti) ; \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - SET_KERNEL_SREGS ; \ - cld ; \ - FAKE_MCOUNT(TF_EIP(%esp)) ; \ - cmpl $0,x2apic_mode ; \ - je 1f ; \ - movl $(MSR_APIC_ISR0 + index),%ecx ; \ - rdmsr ; \ - jmp 2f ; \ -1: ; \ - movl lapic_map, %edx ;/* pointer to local APIC */ \ - movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \ -2: ; \ - bsrl %eax, %eax ; /* index of highest set bit in ISR */ \ - jz 3f ; \ - addl $(32 * index),%eax ; \ - pushl %esp ; \ - pushl %eax ; /* pass the IRQ */ \ - call lapic_handle_intr ; \ - addl $8, %esp ; /* discard parameter */ \ -3: ; \ - MEXITCOUNT ; \ + .macro ISR_VEC index, vec_name + .text + SUPERALIGN_TEXT + .globl X\()\vec_name\()_pti, X\()\vec_name + +X\()\vec_name\()_pti: +X\()\vec_name: + PUSH_FRAME + SET_KERNEL_SREGS + cld + KENTER + FAKE_MCOUNT(TF_EIP(%esp)) + cmpl $0,x2apic_mode + je 2f + movl $(MSR_APIC_ISR0 + \index),%ecx + rdmsr + jmp 3f +2: + movl lapic_map, %edx /* pointer to local APIC */ + movl LA_ISR + 16 * \index(%edx), %eax /* load ISR */ +3: + bsrl %eax, %eax /* index of highest set bit in ISR */ + jz 4f + addl $(32 * \index),%eax + pushl %esp + pushl %eax /* pass the IRQ */ + movl $lapic_handle_intr, %eax + call *%eax + addl $8, %esp /* discard parameter */ +4: + MEXITCOUNT jmp doreti + .endm /* * Handle "spurious INTerrupts". @@ -111,13 +117,13 @@ IDTVEC(spuriousint) iret - ISR_VEC(1, apic_isr1) - ISR_VEC(2, apic_isr2) - ISR_VEC(3, apic_isr3) - ISR_VEC(4, apic_isr4) - ISR_VEC(5, apic_isr5) - ISR_VEC(6, apic_isr6) - ISR_VEC(7, apic_isr7) + ISR_VEC 1, apic_isr1 + ISR_VEC 2, apic_isr2 + ISR_VEC 3, apic_isr3 + ISR_VEC 4, apic_isr4 + ISR_VEC 5, apic_isr5 + ISR_VEC 6, apic_isr6 + ISR_VEC 7, apic_isr7 /* * Local APIC periodic timer handler. @@ -129,9 +135,11 @@ IDTVEC(timerint) PUSH_FRAME SET_KERNEL_SREGS cld + KENTER FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp - call lapic_handle_timer + movl $lapic_handle_timer, %eax + call *%eax add $4, %esp MEXITCOUNT jmp doreti @@ -146,8 +154,10 @@ IDTVEC(cmcint) PUSH_FRAME SET_KERNEL_SREGS cld + KENTER FAKE_MCOUNT(TF_EIP(%esp)) - call lapic_handle_cmc + movl $lapic_handle_cmc, %eax + call *%eax MEXITCOUNT jmp doreti @@ -161,8 +171,10 @@ IDTVEC(errorint) PUSH_FRAME SET_KERNEL_SREGS cld + KENTER FAKE_MCOUNT(TF_EIP(%esp)) - call lapic_handle_error + movl $lapic_handle_error, %eax + call *%eax MEXITCOUNT jmp doreti @@ -177,9 +189,11 @@ IDTVEC(xen_intr_upcall) PUSH_FRAME SET_KERNEL_SREGS cld + KENTER FAKE_MCOUNT(TF_EIP(%esp)) pushl %esp - call xen_intr_handle_upcall + movl $xen_intr_handle_upcall, %eax + call *%eax add $4, %esp MEXITCOUNT jmp doreti @@ -200,9 +214,9 @@ IDTVEC(invltlb) PUSH_FRAME SET_KERNEL_SREGS cld - - call invltlb_handler - + KENTER + movl $invltlb_handler, %eax + call *%eax jmp invltlb_ret /* @@ -214,9 +228,9 @@ IDTVEC(invlpg) PUSH_FRAME SET_KERNEL_SREGS cld - - call invlpg_handler - + KENTER + movl $invlpg_handler, %eax + call *%eax jmp invltlb_ret /* @@ -228,9 +242,9 @@ IDTVEC(invlrng) PUSH_FRAME SET_KERNEL_SREGS cld - - call invlrng_handler - + KENTER + movl $invlrng_handler, %eax + call *%eax jmp invltlb_ret /* @@ -242,9 +256,9 @@ IDTVEC(invlcache) PUSH_FRAME SET_KERNEL_SREGS cld - - call invlcache_handler - + KENTER + movl $invlcache_handler, %eax + call *%eax jmp invltlb_ret /* @@ -256,12 +270,11 @@ IDTVEC(ipi_intr_bitmap_handler) PUSH_FRAME SET_KERNEL_SREGS cld - + KENTER call as_lapic_eoi - FAKE_MCOUNT(TF_EIP(%esp)) - - call ipi_bitmap_handler + movl $ipi_bitmap_handler, %eax + call *%eax MEXITCOUNT jmp doreti @@ -274,9 +287,10 @@ IDTVEC(cpustop) PUSH_FRAME SET_KERNEL_SREGS cld - + KENTER call as_lapic_eoi - call cpustop_handler + movl $cpustop_handler, %eax + call *%eax jmp doreti /* @@ -288,9 +302,10 @@ IDTVEC(cpususpend) PUSH_FRAME SET_KERNEL_SREGS cld - + KENTER call as_lapic_eoi - call cpususpend_handler + movl $cpususpend_handler, %eax + call *%eax jmp doreti /* @@ -304,14 +319,14 @@ IDTVEC(rendezvous) PUSH_FRAME SET_KERNEL_SREGS cld - + KENTER #ifdef COUNT_IPIS movl PCPU(CPUID), %eax movl ipi_rendezvous_counts(,%eax,4), %eax incl (%eax) #endif - call smp_rendezvous_action - + movl $smp_rendezvous_action, %eax + call *%eax call as_lapic_eoi jmp doreti Modified: head/sys/i386/i386/atpic_vector.s ============================================================================== --- head/sys/i386/i386/atpic_vector.s Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/i386/i386/atpic_vector.s Fri Apr 13 20:30:49 2018 (r332489) @@ -36,6 +36,7 @@ * master and slave interrupt controllers. */ +#include <machine/psl.h> #include <machine/asmacros.h> #include "assym.inc" @@ -43,37 +44,41 @@ /* * Macros for interrupt entry, call to handler, and exit. */ -#define INTR(irq_num, vec_name) \ - .text ; \ - SUPERALIGN_TEXT ; \ -IDTVEC(vec_name ##_pti) ; \ -IDTVEC(vec_name) ; \ - PUSH_FRAME ; \ - SET_KERNEL_SREGS ; \ - cld ; \ -; \ - FAKE_MCOUNT(TF_EIP(%esp)) ; \ - pushl %esp ; \ - pushl $irq_num; /* pass the IRQ */ \ - call atpic_handle_intr ; \ - addl $8, %esp ; /* discard the parameters */ \ -; \ - MEXITCOUNT ; \ + .macro INTR irq_num, vec_name + .text + SUPERALIGN_TEXT + .globl X\()\vec_name\()_pti, X\()\vec_name + +X\()\vec_name\()_pti: +X\()\vec_name: + PUSH_FRAME + SET_KERNEL_SREGS + cld + KENTER + FAKE_MCOUNT(TF_EIP(%esp)) + pushl %esp + pushl $\irq_num /* pass the IRQ */ + movl $atpic_handle_intr, %eax + call *%eax + addl $8, %esp /* discard the parameters */ + + MEXITCOUNT jmp doreti + .endm - INTR(0, atpic_intr0) - INTR(1, atpic_intr1) - INTR(2, atpic_intr2) - INTR(3, atpic_intr3) - INTR(4, atpic_intr4) - INTR(5, atpic_intr5) - INTR(6, atpic_intr6) - INTR(7, atpic_intr7) - INTR(8, atpic_intr8) - INTR(9, atpic_intr9) - INTR(10, atpic_intr10) - INTR(11, atpic_intr11) - INTR(12, atpic_intr12) - INTR(13, atpic_intr13) - INTR(14, atpic_intr14) - INTR(15, atpic_intr15) + INTR 0, atpic_intr0 + INTR 1, atpic_intr1 + INTR 2, atpic_intr2 + INTR 3, atpic_intr3 + INTR 4, atpic_intr4 + INTR 5, atpic_intr5 + INTR 6, atpic_intr6 + INTR 7, atpic_intr7 + INTR 8, atpic_intr8 + INTR 9, atpic_intr9 + INTR 10, atpic_intr10 + INTR 11, atpic_intr11 + INTR 12, atpic_intr12 + INTR 13, atpic_intr13 + INTR 14, atpic_intr14 + INTR 15, atpic_intr15 Modified: head/sys/i386/i386/bios.c ============================================================================== --- head/sys/i386/i386/bios.c Fri Apr 13 19:43:23 2018 (r332488) +++ head/sys/i386/i386/bios.c Fri Apr 13 20:30:49 2018 (r332489) @@ -305,6 +305,7 @@ set_bios_selectors(struct bios_segments *seg, int flag } extern int vm86pa; +extern u_long vm86phystk; extern void bios16_jmp(void); /* @@ -329,7 +330,7 @@ bios16(struct bios_args *args, char *fmt, ...) int flags = BIOSCODE_FLAG | BIOSDATA_FLAG; u_int i, arg_start, arg_end; pt_entry_t *pte; - pd_entry_t *ptd; + pd_entry_t *ptd, orig_ptd; arg_start = 0xffffffff; arg_end = 0; @@ -390,27 +391,14 @@ bios16(struct bios_args *args, char *fmt, ...) args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME; args->seg.code32.limit = 0xffff; - ptd = (pd_entry_t *)rcr3(); -#if defined(PAE) || defined(PAE_TABLES) - if (ptd == IdlePDPT) -#else - if (ptd == IdlePTD) -#endif - { - /* - * no page table, so create one and install it. - */ - pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE); - *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V; - *ptd = vtophys(pte) | PG_RW | PG_V; - } else { - /* - * this is a user-level page table - */ - pte = PTmap; - *pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V; - } + /* + * no page table, so create one and install it. + */ + pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK); + ptd = IdlePTD; + *pte = vm86phystk | PG_RW | PG_V; + orig_ptd = *ptd; + *ptd = vtophys(pte) | PG_RW | PG_V; pmap_invalidate_all(kernel_pmap); /* XXX insurance for now */ stack_top = stack; @@ -464,20 +452,12 @@ bios16(struct bios_args *args, char *fmt, ...) i = bios16_call(&args->r, stack_top); - if (pte == PTmap) { - *pte = 0; /* remove entry */ - /* - * XXX only needs to be invlpg(0) but that doesn't work on the 386 - */ - pmap_invalidate_all(kernel_pmap); - } else { - *ptd = 0; /* remove page table */ - /* - * XXX only needs to be invlpg(0) but that doesn't work on the 386 - */ - pmap_invalidate_all(kernel_pmap); - free(pte, M_TEMP); /* ... and free it */ - } + *ptd = orig_ptd; /* remove page table */ + /* + * XXX only needs to be invlpg(0) but that doesn't work on the 386 + */ + pmap_invalidate_all(kernel_pmap); + free(pte, M_TEMP); /* ... and free it */ return (i); } Added: head/sys/i386/i386/copyout.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/i386/i386/copyout.c Fri Apr 13 20:30:49 2018 (r332489) @@ -0,0 +1,489 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov <kib@FreeBSD.org> + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/pcpu.h> +#include <sys/proc.h> +#include <sys/sched.h> +#include <sys/sysctl.h> +#include <sys/systm.h> +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_extern.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_page.h> + +#if defined(PAE) || defined(PAE_TABLES) +#define KCR3 ((u_int)IdlePDPT) +#else +#define KCR3 ((u_int)IdlePTD) +#endif + +int copyin_fast(const void *udaddr, void *kaddr, size_t len, u_int); +static int (*copyin_fast_tramp)(const void *, void *, size_t, u_int); +int copyout_fast(const void *kaddr, void *udaddr, size_t len, u_int); +static int (*copyout_fast_tramp)(const void *, void *, size_t, u_int); +int fubyte_fast(volatile const void *base, u_int kcr3); +static int (*fubyte_fast_tramp)(volatile const void *, u_int); +int fuword16_fast(volatile const void *base, u_int kcr3); +static int (*fuword16_fast_tramp)(volatile const void *, u_int); +int fueword_fast(volatile const void *base, long *val, u_int kcr3); +static int (*fueword_fast_tramp)(volatile const void *, long *, u_int); +int subyte_fast(volatile void *base, int val, u_int kcr3); +static int (*subyte_fast_tramp)(volatile void *, int, u_int); +int suword16_fast(volatile void *base, int val, u_int kcr3); +static int (*suword16_fast_tramp)(volatile void *, int, u_int); +int suword_fast(volatile void *base, long val, u_int kcr3); +static int (*suword_fast_tramp)(volatile void *, long, u_int); + +static int fast_copyout = 1; +SYSCTL_INT(_machdep, OID_AUTO, fast_copyout, CTLFLAG_RWTUN, + &fast_copyout, 0, + ""); + +void +copyout_init_tramp(void) +{ + + copyin_fast_tramp = (int (*)(const void *, void *, size_t, u_int))( + (uintptr_t)copyin_fast + setidt_disp); + copyout_fast_tramp = (int (*)(const void *, void *, size_t, u_int))( + (uintptr_t)copyout_fast + setidt_disp); + fubyte_fast_tramp = (int (*)(volatile const void *, u_int))( + (uintptr_t)fubyte_fast + setidt_disp); + fuword16_fast_tramp = (int (*)(volatile const void *, u_int))( + (uintptr_t)fuword16_fast + setidt_disp); + fueword_fast_tramp = (int (*)(volatile const void *, long *, u_int))( + (uintptr_t)fueword_fast + setidt_disp); + subyte_fast_tramp = (int (*)(volatile void *, int, u_int))( + (uintptr_t)subyte_fast + setidt_disp); + suword16_fast_tramp = (int (*)(volatile void *, int, u_int))( + (uintptr_t)suword16_fast + setidt_disp); + suword_fast_tramp = (int (*)(volatile void *, long, u_int))( + (uintptr_t)suword_fast + setidt_disp); +} + +static int +cp_slow0(vm_offset_t uva, size_t len, bool write, + void (*f)(vm_offset_t, void *), void *arg) +{ + struct pcpu *pc; + vm_page_t m[2]; + pt_entry_t *pte; + vm_offset_t kaddr; + int error, i, plen; + bool sleepable; + + plen = howmany(uva - trunc_page(uva) + len, PAGE_SIZE); + MPASS(plen <= nitems(m)); + error = 0; + i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, uva, len, + (write ? VM_PROT_WRITE : VM_PROT_READ) | VM_PROT_QUICK_NOFAULT, + m, nitems(m)); + if (i != plen) + return (EFAULT); + sched_pin(); + pc = get_pcpu(); + if (!THREAD_CAN_SLEEP() || curthread->td_vslock_sz > 0 || + (curthread->td_pflags & TDP_NOFAULTING) != 0) { + sleepable = false; + mtx_lock(&pc->pc_copyout_mlock); + kaddr = pc->pc_copyout_maddr; + } else { + sleepable = true; + sx_xlock(&pc->pc_copyout_slock); + kaddr = pc->pc_copyout_saddr; + } + for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) { + *pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(m[i]) | + pmap_cache_bits(pmap_page_get_memattr(m[i]), FALSE); + invlpg(kaddr + ptoa(i)); + } + kaddr += uva - trunc_page(uva); + f(kaddr, arg); + sched_unpin(); + if (sleepable) + sx_xunlock(&pc->pc_copyout_slock); + else + mtx_unlock(&pc->pc_copyout_mlock); + for (i = 0; i < plen; i++) { + vm_page_lock(m[i]); + vm_page_unhold(m[i]); + vm_page_unlock(m[i]); + } + return (error); +} + +struct copyinstr_arg0 { + vm_offset_t kc; + size_t len; + size_t alen; + bool end; +}; + +static void +copyinstr_slow0(vm_offset_t kva, void *arg) +{ + struct copyinstr_arg0 *ca; + char c; + + ca = arg; + MPASS(ca->alen == 0 && ca->len > 0 && !ca->end); + while (ca->alen < ca->len && !ca->end) { + c = *(char *)(kva + ca->alen); + *(char *)ca->kc = c; + ca->alen++; + ca->kc++; + if (c == '\0') + ca->end = true; + } +} + +int +copyinstr(const void *udaddr, void *kaddr, size_t maxlen, size_t *lencopied) +{ + struct copyinstr_arg0 ca; + vm_offset_t uc; + size_t plen; + int error; + + error = 0; + ca.end = false; + for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; + plen < maxlen && !ca.end; uc += ca.alen, plen += ca.alen) { + ca.len = round_page(uc) - uc; + if (ca.len == 0) + ca.len = PAGE_SIZE; + if (plen + ca.len > maxlen) + ca.len = maxlen - plen; + ca.alen = 0; + if (cp_slow0(uc, ca.len, false, copyinstr_slow0, &ca) != 0) { + error = EFAULT; + break; + } + } + if (!ca.end && plen == maxlen && error == 0) + error = ENAMETOOLONG; + if (lencopied != NULL) + *lencopied = plen; + return (error); +} + +struct copyin_arg0 { + vm_offset_t kc; + size_t len; +}; + +static void +copyin_slow0(vm_offset_t kva, void *arg) +{ + struct copyin_arg0 *ca; + + ca = arg; + bcopy((void *)kva, (void *)ca->kc, ca->len); +} + +int +copyin(const void *udaddr, void *kaddr, size_t len) +{ + struct copyin_arg0 ca; + vm_offset_t uc; + size_t plen; + + if ((uintptr_t)udaddr + len < (uintptr_t)udaddr || + (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS) + return (-1); + if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ && + copyin_fast_tramp(udaddr, kaddr, len, KCR3) == 0)) + return (0); + for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; + plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) { + ca.len = round_page(uc) - uc; + if (ca.len == 0) + ca.len = PAGE_SIZE; + if (plen + ca.len > len) + ca.len = len - plen; + if (cp_slow0(uc, ca.len, false, copyin_slow0, &ca) != 0) + return (EFAULT); + } + return (0); +} + +static void +copyout_slow0(vm_offset_t kva, void *arg) +{ + struct copyin_arg0 *ca; + + ca = arg; + bcopy((void *)ca->kc, (void *)kva, ca->len); +} + +int +copyout(const void *kaddr, void *udaddr, size_t len) +{ + struct copyin_arg0 ca; + vm_offset_t uc; + size_t plen; + + if ((uintptr_t)udaddr + len < (uintptr_t)udaddr || + (uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS) + return (-1); + if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ && + copyout_fast_tramp(kaddr, udaddr, len, KCR3) == 0)) + return (0); + for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr; + plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) { + ca.len = round_page(uc) - uc; + if (ca.len == 0) + ca.len = PAGE_SIZE; + if (plen + ca.len > len) + ca.len = len - plen; + if (cp_slow0(uc, ca.len, true, copyout_slow0, &ca) != 0) + return (EFAULT); + } + return (0); +} + +/* + * Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user + * memory. + */ + +static void +fubyte_slow0(vm_offset_t kva, void *arg) +{ + + *(int *)arg = *(u_char *)kva; +} + +int +fubyte(volatile const void *base) +{ + int res; + + if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base || + (uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS) + return (-1); + if (fast_copyout) { + res = fubyte_fast_tramp(base, KCR3); + if (res != -1) + return (res); + } + if (cp_slow0((vm_offset_t)base, sizeof(char), false, fubyte_slow0, + &res) != 0) + return (-1); + return (res); +} + +static void +fuword16_slow0(vm_offset_t kva, void *arg) +{ + + *(int *)arg = *(uint16_t *)kva; +} + +int +fuword16(volatile const void *base) +{ + int res; + + if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base || + (uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS) + return (-1); + if (fast_copyout) { + res = fuword16_fast_tramp(base, KCR3); + if (res != -1) + return (res); + } + if (cp_slow0((vm_offset_t)base, sizeof(uint16_t), false, + fuword16_slow0, &res) != 0) + return (-1); + return (res); +} + +static void +fueword_slow0(vm_offset_t kva, void *arg) +{ + + *(uint32_t *)arg = *(uint32_t *)kva; +} + +int +fueword(volatile const void *base, long *val) +{ + uint32_t res; + + if ((uintptr_t)base + sizeof(*val) < (uintptr_t)base || + (uintptr_t)base + sizeof(*val) > VM_MAXUSER_ADDRESS) + return (-1); + if (fast_copyout) { + if (fueword_fast_tramp(base, val, KCR3) == 0) + return (0); + } + if (cp_slow0((vm_offset_t)base, sizeof(long), false, fueword_slow0, + &res) != 0) + return (-1); + *val = res; + return (0); +} + +int +fueword32(volatile const void *base, int32_t *val) *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201804132030.w3DKUnFn050153>