From owner-svn-src-head@FreeBSD.ORG Wed Apr 1 13:09:27 2009 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id F37B4106573D; Wed, 1 Apr 2009 13:09:26 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id DF1E48FC0A; Wed, 1 Apr 2009 13:09:26 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n31D9QmG032652; Wed, 1 Apr 2009 13:09:26 GMT (envelope-from kib@svn.freebsd.org) Received: (from kib@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n31D9QqM032643; Wed, 1 Apr 2009 13:09:26 GMT (envelope-from kib@svn.freebsd.org) Message-Id: <200904011309.n31D9QqM032643@svn.freebsd.org> From: Konstantin Belousov Date: Wed, 1 Apr 2009 13:09:26 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r190620 - in head/sys: amd64/acpica amd64/amd64 amd64/ia32 amd64/include amd64/linux32 conf X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 01 Apr 2009 13:09:27 -0000 Author: kib Date: Wed Apr 1 13:09:26 2009 New Revision: 190620 URL: http://svn.freebsd.org/changeset/base/190620 Log: Save and restore segment registers on amd64 when entering and leaving the kernel on amd64. Fill and read segment registers for mcontext and signals. Handle traps caused by restoration of the invalidated selectors. Implement user-mode creation and manipulation of the process-specific LDT descriptors for amd64, see sysarch(2). Implement support for TSS i/o port access permission bitmap for amd64. Context-switch LDT and TSS. Do not save and restore segment registers on the context switch, that is handled by kernel enter/leave trampolines now. Remove segment restore code from the signal trampolines for freebsd/amd64, freebsd/ia32 and linux/i386 for the same reason. Implement amd64-specific compat shims for sysarch. Linuxolator (temporary ?) switched to use gsbase for thread_area pointer. TODO: Currently, gdb is not adapted to show segment registers from struct reg. Also, no machine-depended ptrace command is added to set segment registers for debugged process. In collaboration with: pho Discussed with: peter Reviewed by: jhb Linuxolator tested by: dchagin Added: head/sys/amd64/ia32/ia32_misc.c (contents, props changed) Modified: head/sys/amd64/acpica/acpi_switch.S head/sys/amd64/amd64/apic_vector.S head/sys/amd64/amd64/cpu_switch.S head/sys/amd64/amd64/db_interface.c head/sys/amd64/amd64/db_trace.c head/sys/amd64/amd64/exception.S head/sys/amd64/amd64/genassym.c head/sys/amd64/amd64/machdep.c head/sys/amd64/amd64/mp_machdep.c head/sys/amd64/amd64/sys_machdep.c head/sys/amd64/amd64/trap.c head/sys/amd64/amd64/vm_machdep.c head/sys/amd64/ia32/ia32_exception.S head/sys/amd64/ia32/ia32_reg.c head/sys/amd64/ia32/ia32_signal.c head/sys/amd64/ia32/ia32_sigtramp.S head/sys/amd64/include/asmacros.h head/sys/amd64/include/frame.h head/sys/amd64/include/md_var.h head/sys/amd64/include/pcb.h head/sys/amd64/include/pcpu.h head/sys/amd64/include/proc.h head/sys/amd64/include/segments.h head/sys/amd64/include/sysarch.h head/sys/amd64/linux32/linux32_locore.s head/sys/amd64/linux32/linux32_machdep.c head/sys/amd64/linux32/linux32_sysvec.c head/sys/conf/files.amd64 Modified: head/sys/amd64/acpica/acpi_switch.S ============================================================================== --- head/sys/amd64/acpica/acpi_switch.S Wed Apr 1 12:53:01 2009 (r190619) +++ head/sys/amd64/acpica/acpi_switch.S Wed Apr 1 13:09:26 2009 (r190620) @@ -64,12 +64,17 @@ ENTRY(acpi_restorecpu) /* Fetch PCB. */ movq WAKEUP_CTX(xpcb), %r11 - /* Restore segment registers. */ - mov WAKEUP_PCB(DS), %ds - mov WAKEUP_PCB(ES), %es - mov WAKEUP_XPCB(SS), %ss - mov WAKEUP_PCB(FS), %fs - mov WAKEUP_PCB(GS), %gs + /* Force kernel segment registers. */ + movl $KDSEL, %eax + movw %ax, %ds + movl $KDSEL, %eax + movw %ax, %es + movl $KDSEL, %eax + movw %ax, %ss + movl $KUF32SEL, %eax + movw %ax, %fs + movl $KUG32SEL, %eax + movw %ax, %gs movl $MSR_FSBASE, %ecx movl WAKEUP_PCB(FSBASE), %eax Modified: head/sys/amd64/amd64/apic_vector.S ============================================================================== --- head/sys/amd64/amd64/apic_vector.S Wed Apr 1 12:53:01 2009 (r190619) +++ head/sys/amd64/amd64/apic_vector.S Wed Apr 1 13:09:26 2009 (r190620) @@ -219,9 +219,7 @@ IDTVEC(cpustop) movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ call cpustop_handler - - POP_FRAME - iretq + jmp doreti /* * Executed by a CPU when it receives an IPI_SUSPEND from another CPU. @@ -251,6 +249,5 @@ IDTVEC(rendezvous) call smp_rendezvous_action movq lapic, %rax movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ - POP_FRAME /* Why not doreti? */ - iretq + jmp doreti #endif /* SMP */ Modified: head/sys/amd64/amd64/cpu_switch.S ============================================================================== --- head/sys/amd64/amd64/cpu_switch.S Wed Apr 1 12:53:01 2009 (r190619) +++ head/sys/amd64/amd64/cpu_switch.S Wed Apr 1 13:09:26 2009 (r190620) @@ -75,8 +75,6 @@ ENTRY(cpu_throw) 1: movq TD_PCB(%rdi),%r8 /* Old pcb */ movl PCPU(CPUID), %eax - movq PCB_FSBASE(%r8),%r9 - movq PCB_GSBASE(%r8),%r10 /* release bit from old pm_active */ movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ @@ -110,28 +108,6 @@ ENTRY(cpu_switch) movq %rbx,PCB_RBX(%r8) movq %rax,PCB_RIP(%r8) - /* - * Reread fs and gs bases. Explicit fs segment register load - * by the usermode code may change actual fs base without - * updating pcb_{fs,gs}base. - * - * %rdx still contains the mtx, save %rdx around rdmsr. - */ - movq %rdx,%r11 - movl $MSR_FSBASE,%ecx - rdmsr - shlq $32,%rdx - leaq (%rax,%rdx),%r9 - movl $MSR_KGSBASE,%ecx - rdmsr - shlq $32,%rdx - leaq (%rax,%rdx),%r10 - movq %r11,%rdx - - testl $PCB_32BIT,PCB_FLAGS(%r8) - jnz store_seg -done_store_seg: - testl $PCB_DBREGS,PCB_FLAGS(%r8) jnz store_dr /* static predict not taken */ done_store_dr: @@ -192,36 +168,47 @@ sw1: testl $TDP_KTHREAD,TD_PFLAGS(%rsi) jnz do_kthread - testl $PCB_32BIT,PCB_FLAGS(%r8) - jnz load_seg -done_load_seg: - - cmpq PCB_FSBASE(%r8),%r9 - jz 1f - /* Restore userland %fs */ -restore_fsbase: - movl $MSR_FSBASE,%ecx + /* + * Load ldt register + */ + movq TD_PROC(%rsi),%rcx + cmpq $0, P_MD+MD_LDT(%rcx) + jne do_ldt + xorl %eax,%eax +ld_ldt: lldt %ax + + /* Restore fs base in GDT */ movl PCB_FSBASE(%r8),%eax - movl PCB_FSBASE+4(%r8),%edx - wrmsr -1: - cmpq PCB_GSBASE(%r8),%r10 - jz 2f - /* Restore userland %gs */ - movl $MSR_KGSBASE,%ecx + movq PCPU(FS32P),%rdx + movw %ax,2(%rdx) + shrl $16,%eax + movb %al,4(%rdx) + shrl $8,%eax + movb %al,7(%rdx) + + /* Restore gs base in GDT */ movl PCB_GSBASE(%r8),%eax - movl PCB_GSBASE+4(%r8),%edx - wrmsr -2: + movq PCPU(GS32P),%rdx + movw %ax,2(%rdx) + shrl $16,%eax + movb %al,4(%rdx) + shrl $8,%eax + movb %al,7(%rdx) -do_tss: +do_kthread: + /* Do we need to reload tss ? */ + movq PCPU(TSSP),%rax + movq PCB_TSSP(%r8),%rdx + testq %rdx,%rdx + cmovzq PCPU(COMMONTSSP),%rdx + cmpq %rax,%rdx + jne do_tss +done_tss: + movq %r8,PCPU(RSP0) + movq %r8,PCPU(CURPCB) /* Update the TSS_RSP0 pointer for the next interrupt */ - movq PCPU(TSSP), %rax - movq %r8, PCPU(RSP0) - movq %r8, PCPU(CURPCB) - addq $COMMON_TSS_RSP0, %rax - movq %rsi, PCPU(CURTHREAD) /* into next thread */ - movq %r8, (%rax) + movq %r8,COMMON_TSS_RSP0(%rdx) + movq %rsi,PCPU(CURTHREAD) /* into next thread */ /* Test if debug registers should be restored. */ testl $PCB_DBREGS,PCB_FLAGS(%r8) @@ -250,45 +237,6 @@ done_load_dr: * We use jumps rather than call in order to avoid the stack. */ -do_kthread: - /* - * Copy old fs/gsbase to new kthread pcb for future switches - * This maintains curpcb->pcb_[fg]sbase as caches of the MSR - */ - movq %r9,PCB_FSBASE(%r8) - movq %r10,PCB_GSBASE(%r8) - jmp do_tss - -store_seg: - mov %gs,PCB_GS(%r8) - testl $PCB_GS32BIT,PCB_FLAGS(%r8) - jnz 2f -1: mov %ds,PCB_DS(%r8) - mov %es,PCB_ES(%r8) - mov %fs,PCB_FS(%r8) - jmp done_store_seg -2: movq PCPU(GS32P),%rax - movq (%rax),%rax - movq %rax,PCB_GS32SD(%r8) - jmp 1b - -load_seg: - testl $PCB_GS32BIT,PCB_FLAGS(%r8) - jnz 2f -1: movl $MSR_GSBASE,%ecx - rdmsr - mov PCB_GS(%r8),%gs - wrmsr - mov PCB_DS(%r8),%ds - mov PCB_ES(%r8),%es - mov PCB_FS(%r8),%fs - jmp restore_fsbase - /* Restore userland %gs while preserving kernel gsbase */ -2: movq PCPU(GS32P),%rax - movq PCB_GS32SD(%r8),%rcx - movq %rcx,(%rax) - jmp 1b - store_dr: movq %dr7,%rax /* yes, do the save */ movq %dr0,%r15 @@ -325,6 +273,29 @@ load_dr: movq %r11,%dr6 movq %rax,%dr7 jmp done_load_dr + +do_tss: movq %rdx,PCPU(TSSP) + movq %rdx,%rcx + movq PCPU(TSS),%rax + movw %rcx,2(%rax) + shrq $16,%rcx + movb %cl,4(%rax) + shrq $8,%rcx + movb %cl,7(%rax) + shrq $8,%rcx + movl %ecx,8(%rax) + movb $0x89,5(%rax) /* unset busy */ + movl $TSSSEL,%eax + ltr %ax + jmp done_tss + +do_ldt: movq PCPU(LDT),%rax + movq P_MD+MD_LDT_SD(%rcx),%rdx + movq %rdx,(%rax) + movq P_MD+MD_LDT_SD+8(%rcx),%rdx + movq %rdx,8(%rax) + movl $LDTSEL,%eax + jmp ld_ldt END(cpu_switch) /* @@ -398,12 +369,6 @@ ENTRY(savectx2) movq (%rsp),%rax movq %rax,PCB_RIP(%r8) - mov %ds,PCB_DS(%r8) - mov %es,PCB_ES(%r8) - mov %ss,XPCB_SS(%r8) - mov %fs,PCB_FS(%r8) - mov %gs,PCB_GS(%r8) - movq %rbx,PCB_RBX(%r8) movq %rsp,PCB_RSP(%r8) movq %rbp,PCB_RBP(%r8) Modified: head/sys/amd64/amd64/db_interface.c ============================================================================== --- head/sys/amd64/amd64/db_interface.c Wed Apr 1 12:53:01 2009 (r190619) +++ head/sys/amd64/amd64/db_interface.c Wed Apr 1 13:09:26 2009 (r190620) @@ -139,7 +139,11 @@ void db_show_mdpcpu(struct pcpu *pc) { -#if 0 - db_printf("currentldt = 0x%x\n", pc->pc_currentldt); -#endif + db_printf("curpmap = %p\n", pc->pc_curpmap); + db_printf("tssp = %p\n", pc->pc_tssp); + db_printf("commontssp = %p\n", pc->pc_commontssp); + db_printf("rsp0 = 0x%lx\n", pc->pc_rsp0); + db_printf("gs32p = %p\n", pc->pc_gs32p); + db_printf("ldt = %p\n", pc->pc_ldt); + db_printf("tss = %p\n", pc->pc_tss); } Modified: head/sys/amd64/amd64/db_trace.c ============================================================================== --- head/sys/amd64/amd64/db_trace.c Wed Apr 1 12:53:01 2009 (r190619) +++ head/sys/amd64/amd64/db_trace.c Wed Apr 1 13:09:26 2009 (r190620) @@ -69,12 +69,10 @@ static db_varfcn_t db_ss; #define DB_OFFSET(x) (db_expr_t *)offsetof(struct trapframe, x) struct db_variable db_regs[] = { { "cs", DB_OFFSET(tf_cs), db_frame }, -#if 0 { "ds", DB_OFFSET(tf_ds), db_frame }, { "es", DB_OFFSET(tf_es), db_frame }, { "fs", DB_OFFSET(tf_fs), db_frame }, { "gs", DB_OFFSET(tf_gs), db_frame }, -#endif { "ss", NULL, db_ss }, { "rax", DB_OFFSET(tf_rax), db_frame }, { "rcx", DB_OFFSET(tf_rcx), db_frame }, @@ -94,7 +92,7 @@ struct db_variable db_regs[] = { { "r15", DB_OFFSET(tf_r15), db_frame }, { "rip", DB_OFFSET(tf_rip), db_frame }, { "rflags", DB_OFFSET(tf_rflags), db_frame }, -#define DB_N_SHOW_REGS 20 /* Don't show registers after here. */ +#define DB_N_SHOW_REGS 24 /* Don't show registers after here. */ { "dr0", NULL, db_dr0 }, { "dr1", NULL, db_dr1 }, { "dr2", NULL, db_dr2 }, @@ -357,7 +355,7 @@ db_nextframe(struct amd64_frame **fp, db rbp = tf->tf_rbp; switch (frame_type) { case TRAP: - db_printf("--- trap %#lr", tf->tf_trapno); + db_printf("--- trap %#r", tf->tf_trapno); break; case SYSCALL: db_printf("--- syscall"); Modified: head/sys/amd64/amd64/exception.S ============================================================================== --- head/sys/amd64/amd64/exception.S Wed Apr 1 12:53:01 2009 (r190619) +++ head/sys/amd64/amd64/exception.S Wed Apr 1 13:09:26 2009 (r190620) @@ -42,6 +42,7 @@ #include #include #include +#include #include "assym.s" @@ -99,7 +100,7 @@ MCOUNT_LABEL(btrap) /* Traps that we leave interrupts disabled for.. */ #define TRAP_NOEN(a) \ subq $TF_RIP,%rsp; \ - movq $(a),TF_TRAPNO(%rsp) ; \ + movl $(a),TF_TRAPNO(%rsp) ; \ movq $0,TF_ADDR(%rsp) ; \ movq $0,TF_ERR(%rsp) ; \ jmp alltraps_noen @@ -111,7 +112,7 @@ IDTVEC(bpt) /* Regular traps; The cpu does not supply tf_err for these. */ #define TRAP(a) \ subq $TF_RIP,%rsp; \ - movq $(a),TF_TRAPNO(%rsp) ; \ + movl $(a),TF_TRAPNO(%rsp) ; \ movq $0,TF_ADDR(%rsp) ; \ movq $0,TF_ERR(%rsp) ; \ jmp alltraps @@ -139,7 +140,7 @@ IDTVEC(xmm) /* This group of traps have tf_err already pushed by the cpu */ #define TRAP_ERR(a) \ subq $TF_ERR,%rsp; \ - movq $(a),TF_TRAPNO(%rsp) ; \ + movl $(a),TF_TRAPNO(%rsp) ; \ movq $0,TF_ADDR(%rsp) ; \ jmp alltraps IDTVEC(tss) @@ -164,6 +165,10 @@ alltraps: testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz alltraps_testi /* already running with kernel GS.base */ swapgs + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) alltraps_testi: testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs @@ -185,6 +190,7 @@ alltraps_pushregs_no_rdi: movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) #ifdef KDTRACE_HOOKS /* @@ -193,7 +199,7 @@ alltraps_pushregs_no_rdi: * interrupt. For all other trap types, just handle them in * the usual way. */ - cmpq $T_BPTFLT,TF_TRAPNO(%rsp) + cmpl $T_BPTFLT,TF_TRAPNO(%rsp) jne calltrap /* Check if there is no DTrace hook registered. */ @@ -228,13 +234,17 @@ calltrap: .type alltraps_noen,@function alltraps_noen: testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz alltraps_pushregs /* already running with kernel GS.base */ + jz 1f /* already running with kernel GS.base */ swapgs +1: movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) jmp alltraps_pushregs IDTVEC(dblfault) subq $TF_ERR,%rsp - movq $T_DOUBLEFLT,TF_TRAPNO(%rsp) + movl $T_DOUBLEFLT,TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) @@ -252,6 +262,11 @@ IDTVEC(dblfault) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs @@ -262,7 +277,7 @@ IDTVEC(dblfault) IDTVEC(page) subq $TF_ERR,%rsp - movq $T_PAGEFLT,TF_TRAPNO(%rsp) + movl $T_PAGEFLT,TF_TRAPNO(%rsp) testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ jz 1f /* already running with kernel GS.base */ swapgs @@ -270,6 +285,10 @@ IDTVEC(page) movq %rdi,TF_RDI(%rsp) /* free up a GP register */ movq %cr2,%rdi /* preserve %cr2 before .. */ movq %rdi,TF_ADDR(%rsp) /* enabling interrupts. */ + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs_no_rdi sti @@ -283,17 +302,19 @@ IDTVEC(page) */ IDTVEC(prot) subq $TF_ERR,%rsp - movq $T_PROTFLT,TF_TRAPNO(%rsp) + movl $T_PROTFLT,TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq %rdi,TF_RDI(%rsp) /* free up a GP register */ leaq doreti_iret(%rip),%rdi cmpq %rdi,TF_RIP(%rsp) - je 2f /* kernel but with user gsbase!! */ + je 1f /* kernel but with user gsbase!! */ testb $SEL_RPL_MASK,TF_CS(%rsp) /* Did we come from kernel? */ - jz 1f /* already running with kernel GS.base */ -2: - swapgs -1: + jz 2f /* already running with kernel GS.base */ +1: swapgs +2: movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) testl $PSL_I,TF_RFLAGS(%rsp) jz alltraps_pushregs_no_rdi sti @@ -316,6 +337,10 @@ IDTVEC(fast_syscall) movq %rcx,TF_RIP(%rsp) /* %rcx original value is in %r10 */ movq PCPU(SCRATCH_RSP),%r11 /* %r11 already saved */ movq %r11,TF_RSP(%rsp) /* user stack pointer */ + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) sti movq $KUDSEL,TF_SS(%rsp) movq $KUCSEL,TF_CS(%rsp) @@ -333,40 +358,11 @@ IDTVEC(fast_syscall) movq %r13,TF_R13(%rsp) /* C preserved */ movq %r14,TF_R14(%rsp) /* C preserved */ movq %r15,TF_R15(%rsp) /* C preserved */ + movl $TF_HASSEGS,TF_FLAGS(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi call syscall movq PCPU(CURPCB),%rax - testq $PCB_FULLCTX,PCB_FLAGS(%rax) - jne 3f -1: /* Check for and handle AST's on return to userland */ - cli - movq PCPU(CURTHREAD),%rax - testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) - je 2f - sti - movq %rsp, %rdi - call ast - jmp 1b -2: /* restore preserved registers */ - MEXITCOUNT - movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ - movq TF_RSI(%rsp),%rsi /* bonus: preserve arg 2 */ - movq TF_RDX(%rsp),%rdx /* return value 2 */ - movq TF_RAX(%rsp),%rax /* return value 1 */ - movq TF_RBX(%rsp),%rbx /* C preserved */ - movq TF_RBP(%rsp),%rbp /* C preserved */ - movq TF_R12(%rsp),%r12 /* C preserved */ - movq TF_R13(%rsp),%r13 /* C preserved */ - movq TF_R14(%rsp),%r14 /* C preserved */ - movq TF_R15(%rsp),%r15 /* C preserved */ - movq TF_RFLAGS(%rsp),%r11 /* original %rflags */ - movq TF_RIP(%rsp),%rcx /* original %rip */ - movq TF_RSP(%rsp),%r9 /* user stack pointer */ - movq %r9,%rsp /* original %rsp */ - swapgs - sysretq -3: /* Requested full context restore, use doreti for that */ andq $~PCB_FULLCTX,PCB_FLAGS(%rax) MEXITCOUNT jmp doreti @@ -405,7 +401,7 @@ IDTVEC(fast_syscall32) IDTVEC(nmi) subq $TF_RIP,%rsp - movq $(T_NMI),TF_TRAPNO(%rsp) + movl $(T_NMI),TF_TRAPNO(%rsp) movq $0,TF_ADDR(%rsp) movq $0,TF_ERR(%rsp) movq %rdi,TF_RDI(%rsp) @@ -423,6 +419,11 @@ IDTVEC(nmi) movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) + movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) xorl %ebx,%ebx testb $SEL_RPL_MASK,TF_CS(%rsp) jnz nmi_fromuserspace @@ -515,9 +516,7 @@ outofnmi: nocallchain: #endif testl %ebx,%ebx - jz nmi_kernelexit - swapgs - jmp nmi_restoreregs + jnz doreti_exit nmi_kernelexit: /* * Put back the preserved MSR_GSBASE value. @@ -633,7 +632,55 @@ doreti_ast: */ doreti_exit: MEXITCOUNT - movq TF_RDI(%rsp),%rdi + movq PCPU(CURTHREAD),%r8 + movq TD_PCB(%r8),%r8 + + /* + * Do not reload segment registers for kernel. + * Since we do not reload segments registers with sane + * values on kernel entry, descriptors referenced by + * segments registers may be not valid. This is fatal + * for the usermode, but is innocent for the kernel. + */ + testb $SEL_RPL_MASK,TF_CS(%rsp) + jz ld_regs + + testl $TF_HASSEGS,TF_FLAGS(%rsp) + je set_segs + +do_segs: + /* Restore %fs and fsbase */ + movw TF_FS(%rsp),%ax + .globl ld_fs +ld_fs: movw %ax,%fs + cmpw $KUF32SEL,%ax + jne 1f + movl $MSR_FSBASE,%ecx + movl PCB_FSBASE(%r8),%eax + movl PCB_FSBASE+4(%r8),%edx + wrmsr +1: + /* Restore %gs and gsbase */ + movw TF_GS(%rsp),%si + pushfq + cli + movl $MSR_GSBASE,%ecx + rdmsr + .globl ld_gs +ld_gs: movw %si,%gs + wrmsr + popfq + cmpw $KUG32SEL,%si + jne 1f + movl $MSR_KGSBASE,%ecx + movl PCB_GSBASE(%r8),%eax + movl PCB_GSBASE+4(%r8),%edx + wrmsr +1: .globl ld_es +ld_es: movw TF_ES(%rsp),%es + .globl ld_ds +ld_ds: movw TF_DS(%rsp),%ds +ld_regs:movq TF_RDI(%rsp),%rdi movq TF_RSI(%rsp),%rsi movq TF_RDX(%rsp),%rdx movq TF_RCX(%rsp),%rcx @@ -657,6 +704,14 @@ doreti_exit: doreti_iret: iretq +set_segs: + movw $KUDSEL,%ax + movw %ax,TF_DS(%rsp) + movw %ax,TF_ES(%rsp) + movw $KUF32SEL,TF_FS(%rsp) + movw $KUG32SEL,TF_GS(%rsp) + jmp do_segs + /* * doreti_iret_fault. Alternative return code for * the case where we get a fault in the doreti_exit code @@ -671,7 +726,12 @@ doreti_iret_fault: testl $PSL_I,TF_RFLAGS(%rsp) jz 1f sti -1: movq %rdi,TF_RDI(%rsp) +1: movw %fs,TF_FS(%rsp) + movw %gs,TF_GS(%rsp) + movw %es,TF_ES(%rsp) + movw %ds,TF_DS(%rsp) + movl $TF_HASSEGS,TF_FLAGS(%rsp) + movq %rdi,TF_RDI(%rsp) movq %rsi,TF_RSI(%rsp) movq %rdx,TF_RDX(%rsp) movq %rcx,TF_RCX(%rsp) @@ -686,11 +746,48 @@ doreti_iret_fault: movq %r13,TF_R13(%rsp) movq %r14,TF_R14(%rsp) movq %r15,TF_R15(%rsp) - movq $T_PROTFLT,TF_TRAPNO(%rsp) + movl $T_PROTFLT,TF_TRAPNO(%rsp) movq $0,TF_ERR(%rsp) /* XXX should be the error code */ movq $0,TF_ADDR(%rsp) FAKE_MCOUNT(TF_RIP(%rsp)) jmp calltrap + + ALIGN_TEXT + .globl ds_load_fault +ds_load_fault: + movl $T_PROTFLT,TF_TRAPNO(%rsp) + movzwl TF_DS(%rsp),%edx + movl %edx,TF_ERR(%rsp) + movw $KUDSEL,TF_DS(%rsp) + jmp calltrap + + ALIGN_TEXT + .globl es_load_fault +es_load_fault: + movl $T_PROTFLT,TF_TRAPNO(%rsp) + movzwl TF_ES(%rsp),%edx + movl %edx,TF_ERR(%rsp) + movw $KUDSEL,TF_ES(%rsp) + jmp calltrap + + ALIGN_TEXT + .globl fs_load_fault +fs_load_fault: + movl $T_PROTFLT,TF_TRAPNO(%rsp) + movzwl TF_FS(%rsp),%edx + movl %edx,TF_ERR(%rsp) + movw $KUF32SEL,TF_FS(%rsp) + jmp calltrap + + ALIGN_TEXT + .globl gs_load_fault +gs_load_fault: + popfq + movl $T_PROTFLT,TF_TRAPNO(%rsp) + movzwl TF_GS(%rsp),%edx + movl %edx,TF_ERR(%rsp) + movw $KUG32SEL,TF_GS(%rsp) + jmp calltrap #ifdef HWPMC_HOOKS ENTRY(end_exceptions) #endif Modified: head/sys/amd64/amd64/genassym.c ============================================================================== --- head/sys/amd64/amd64/genassym.c Wed Apr 1 12:53:01 2009 (r190619) +++ head/sys/amd64/amd64/genassym.c Wed Apr 1 13:09:26 2009 (r190620) @@ -79,6 +79,10 @@ ASSYM(P_VMSPACE, offsetof(struct proc, p ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap)); ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active)); +ASSYM(P_MD, offsetof(struct proc, p_md)); +ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt)); +ASSYM(MD_LDT_SD, offsetof(struct mdproc, md_ldt_sd)); + ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); @@ -132,16 +136,13 @@ ASSYM(PCB_RBX, offsetof(struct pcb, pcb_ ASSYM(PCB_RIP, offsetof(struct pcb, pcb_rip)); ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase)); ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase)); -ASSYM(PCB_DS, offsetof(struct pcb, pcb_ds)); -ASSYM(PCB_ES, offsetof(struct pcb, pcb_es)); -ASSYM(PCB_FS, offsetof(struct pcb, pcb_fs)); -ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs)); ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0)); ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1)); ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); +ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_32BIT, PCB_32BIT); ASSYM(PCB_GS32BIT, PCB_GS32BIT); @@ -193,7 +194,13 @@ ASSYM(TF_CS, offsetof(struct trapframe, ASSYM(TF_RFLAGS, offsetof(struct trapframe, tf_rflags)); ASSYM(TF_RSP, offsetof(struct trapframe, tf_rsp)); ASSYM(TF_SS, offsetof(struct trapframe, tf_ss)); +ASSYM(TF_DS, offsetof(struct trapframe, tf_ds)); +ASSYM(TF_ES, offsetof(struct trapframe, tf_es)); +ASSYM(TF_FS, offsetof(struct trapframe, tf_fs)); +ASSYM(TF_GS, offsetof(struct trapframe, tf_gs)); +ASSYM(TF_FLAGS, offsetof(struct trapframe, tf_flags)); ASSYM(TF_SIZE, sizeof(struct trapframe)); +ASSYM(TF_HASSEGS, TF_HASSEGS); ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler)); ASSYM(SIGF_UC, offsetof(struct sigframe, sf_uc)); @@ -215,7 +222,11 @@ ASSYM(PC_SCRATCH_RSP, offsetof(struct pc ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap)); ASSYM(PC_TSSP, offsetof(struct pcpu, pc_tssp)); ASSYM(PC_RSP0, offsetof(struct pcpu, pc_rsp0)); +ASSYM(PC_FS32P, offsetof(struct pcpu, pc_fs32p)); ASSYM(PC_GS32P, offsetof(struct pcpu, pc_gs32p)); +ASSYM(PC_LDT, offsetof(struct pcpu, pc_ldt)); +ASSYM(PC_COMMONTSSP, offsetof(struct pcpu, pc_commontssp)); +ASSYM(PC_TSS, offsetof(struct pcpu, pc_tss)); ASSYM(LA_VER, offsetof(struct LAPIC, version)); ASSYM(LA_TPR, offsetof(struct LAPIC, tpr)); @@ -230,6 +241,10 @@ ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL)); ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL)); +ASSYM(KUF32SEL, GSEL(GUFS32_SEL, SEL_UPL)); +ASSYM(KUG32SEL, GSEL(GUGS32_SEL, SEL_UPL)); +ASSYM(TSSSEL, GSEL(GPROC0_SEL, SEL_KPL)); +ASSYM(LDTSEL, GSEL(GUSERLDT_SEL, SEL_KPL)); ASSYM(SEL_RPL_MASK, SEL_RPL_MASK); ASSYM(MSR_GSBASE, MSR_GSBASE); Modified: head/sys/amd64/amd64/machdep.c ============================================================================== --- head/sys/amd64/amd64/machdep.c Wed Apr 1 12:53:01 2009 (r190619) +++ head/sys/amd64/amd64/machdep.c Wed Apr 1 13:09:26 2009 (r190620) @@ -159,7 +159,7 @@ extern vm_offset_t ksym_start, ksym_end; #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 -int _udatasel, _ucodesel, _ucode32sel; +int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; int cold = 1; @@ -192,6 +192,8 @@ struct mtx icu_lock; struct mem_range_softc mem_range_softc; +struct mtx dt_lock; /* lock for GDT and LDT */ + static void cpu_startup(dummy) void *dummy; @@ -278,7 +280,7 @@ cpu_startup(dummy) * Send an interrupt to process. * * Stack is set up to allow sigcode stored - * at top to call routine, followed by kcall + * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user @@ -316,6 +318,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); + sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase; + sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase; /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && @@ -370,6 +374,11 @@ sendsig(sig_t catcher, ksiginfo_t *ksi, regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; + regs->tf_fs = _ufssel; + regs->tf_gs = _ugssel; + regs->tf_flags = TF_HASSEGS; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } @@ -401,9 +410,16 @@ sigreturn(td, uap) ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); - if (error != 0) + if (error != 0) { + printf("sigreturn (pid %d): copyin failed\n", p->p_pid); return (error); + } ucp = &uc; + if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { + printf("sigreturn (pid %d): mc_flags %x\n", p->p_pid, + ucp->uc_mcontext.mc_flags); + return (EINVAL); + } regs = td->td_frame; rflags = ucp->uc_mcontext.mc_rflags; /* @@ -420,7 +436,8 @@ sigreturn(td, uap) * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { - printf("sigreturn: rflags = 0x%lx\n", rflags); + printf("sigreturn (pid %d): rflags = 0x%lx\n", p->p_pid, + rflags); return (EINVAL); } @@ -431,7 +448,7 @@ sigreturn(td, uap) */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { - printf("sigreturn: cs = 0x%x\n", cs); + printf("sigreturn (pid %d): cs = 0x%x\n", p->p_pid, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; @@ -442,9 +459,13 @@ sigreturn(td, uap) } ret = set_fpcontext(td, &ucp->uc_mcontext); - if (ret != 0) + if (ret != 0) { + printf("sigreturn (pid %d): set_fpcontext\n", p->p_pid); return (ret); + } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); + td->td_pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; + td->td_pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; PROC_LOCK(p); #if defined(COMPAT_43) @@ -738,22 +759,16 @@ exec_setregs(td, entry, stack, ps_string { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; + + mtx_lock(&dt_lock); + if (td->td_proc->p_md.md_ldt != NULL) + user_ldt_free(td); + else + mtx_unlock(&dt_lock); - critical_enter(); - wrmsr(MSR_FSBASE, 0); - wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; - critical_exit(); pcb->pcb_flags &= ~(PCB_32BIT | PCB_GS32BIT); - load_ds(_udatasel); - load_es(_udatasel); - load_fs(_udatasel); - load_gs(_udatasel); - pcb->pcb_ds = _udatasel; - pcb->pcb_es = _udatasel; - pcb->pcb_fs = _udatasel; - pcb->pcb_gs = _udatasel; pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; bzero((char *)regs, sizeof(struct trapframe)); @@ -763,6 +778,11 @@ exec_setregs(td, entry, stack, ps_string regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; + regs->tf_ds = _udatasel; + regs->tf_es = _udatasel; + regs->tf_fs = _ufssel; + regs->tf_gs = _ugssel; + regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. @@ -1380,12 +1400,12 @@ hammer_time(u_int64_t modulep, u_int64_t /* * make gdt memory segments */ - gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; - for (x = 0; x < NGDT; x++) { - if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) + if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && + x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) ssdtosd(&gdt_segs[x], &gdt[x]); } + gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); @@ -1403,6 +1423,10 @@ hammer_time(u_int64_t modulep, u_int64_t PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); PCPU_SET(tssp, &common_tss[0]); + PCPU_SET(commontssp, &common_tss[0]); + PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); + PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); + PCPU_SET(fs32p, &gdt[GUFS32_SEL]); PCPU_SET(gs32p, &gdt[GUGS32_SEL]); /* @@ -1415,6 +1439,7 @@ hammer_time(u_int64_t modulep, u_int64_t */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); + mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); /* exceptions */ for (x = 0; x < NIDT; x++) @@ -1503,7 +1528,8 @@ hammer_time(u_int64_t modulep, u_int64_t common_tss[0].tss_ist2 = (long) np; /* Set the IO permission bitmap (empty due to tss seg limit) */ - common_tss[0].tss_iobase = sizeof(struct amd64tss); + common_tss[0].tss_iobase = sizeof(struct amd64tss) + + IOPAGES * PAGE_SIZE; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); @@ -1531,10 +1557,12 @@ hammer_time(u_int64_t modulep, u_int64_t _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); + _ufssel = GSEL(GUFS32_SEL, SEL_UPL); + _ugssel = GSEL(GUGS32_SEL, SEL_UPL); load_ds(_udatasel); load_es(_udatasel); - load_fs(_udatasel); + load_fs(_ufssel); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; @@ -1656,6 +1684,17 @@ fill_regs(struct thread *td, struct reg regs->r_rflags = tp->tf_rflags; regs->r_rsp = tp->tf_rsp; regs->r_ss = tp->tf_ss; + if (tp->tf_flags & TF_HASSEGS) { + regs->r_ds = tp->tf_ds; + regs->r_es = tp->tf_es; + regs->r_fs = tp->tf_fs; + regs->r_gs = tp->tf_gs; + } else { + regs->r_ds = 0; + regs->r_es = 0; + regs->r_fs = 0; + regs->r_gs = 0; + } return (0); } @@ -1689,6 +1728,13 @@ set_regs(struct thread *td, struct reg * tp->tf_rflags = rflags; tp->tf_rsp = regs->r_rsp; tp->tf_ss = regs->r_ss; + if (0) { /* XXXKIB */ + tp->tf_ds = regs->r_ds; + tp->tf_es = regs->r_es; + tp->tf_fs = regs->r_fs; + tp->tf_gs = regs->r_gs; + tp->tf_flags = TF_HASSEGS; + } td->td_pcb->pcb_flags |= PCB_FULLCTX; return (0); } @@ -1808,8 +1854,15 @@ get_mcontext(struct thread *td, mcontext mcp->mc_cs = tp->tf_cs; mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; + mcp->mc_ds = tp->tf_ds; + mcp->mc_es = tp->tf_es; + mcp->mc_fs = tp->tf_fs; + mcp->mc_gs = tp->tf_gs; + mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); + mcp->mc_fsbase = td->td_pcb->pcb_fsbase; + mcp->mc_gsbase = td->td_pcb->pcb_gsbase; return (0); } @@ -1827,7 +1880,8 @@ set_mcontext(struct thread *td, const mc int ret; tp = td->td_frame; - if (mcp->mc_len != sizeof(*mcp)) + if (mcp->mc_len != sizeof(*mcp) || + (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); @@ -1853,6 +1907,17 @@ set_mcontext(struct thread *td, const mc tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; + tp->tf_flags = mcp->mc_flags; + if (tp->tf_flags & TF_HASSEGS) { + tp->tf_ds = mcp->mc_ds; + tp->tf_es = mcp->mc_es; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***