Date: Sat, 5 Jun 2010 16:00:00 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r208833 - in head/sys: amd64/acpica amd64/amd64 amd64/ia32 amd64/include i386/i386 i386/include i386/isa i386/linux pc98/pc98 Message-ID: <201006051600.o55G00Gr068861@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Sat Jun 5 15:59:59 2010 New Revision: 208833 URL: http://svn.freebsd.org/changeset/base/208833 Log: Introduce the x86 kernel interfaces to allow kernel code to use FPU/SSE hardware. Caller should provide a save area that is chained into the stack of the areas; pcb save_area for usermode FPU state is on top. The pcb now contains a pointer to the current FPU saved area, used during FPUDNA handling and context switches. There is also a facility to allow the kernel thread to use pcb save_area. Change the dreaded warnings "npxdna in kernel mode!" into the panics when FPU usage is not registered. KPI discussed with: fabient Tested by: pho, fabient Hardware provided by: Sentex Communications MFC after: 1 month Modified: head/sys/amd64/acpica/acpi_wakeup.c head/sys/amd64/amd64/cpu_switch.S head/sys/amd64/amd64/fpu.c head/sys/amd64/amd64/machdep.c head/sys/amd64/amd64/mp_machdep.c head/sys/amd64/amd64/trap.c head/sys/amd64/amd64/vm_machdep.c head/sys/amd64/ia32/ia32_reg.c head/sys/amd64/include/fpu.h head/sys/amd64/include/pcb.h head/sys/i386/i386/machdep.c head/sys/i386/i386/ptrace_machdep.c head/sys/i386/i386/swtch.s head/sys/i386/i386/trap.c head/sys/i386/i386/vm_machdep.c head/sys/i386/include/npx.h head/sys/i386/include/pcb.h head/sys/i386/isa/npx.c head/sys/i386/linux/linux_ptrace.c head/sys/pc98/pc98/machdep.c Modified: head/sys/amd64/acpica/acpi_wakeup.c ============================================================================== --- head/sys/amd64/acpica/acpi_wakeup.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/acpica/acpi_wakeup.c Sat Jun 5 15:59:59 2010 (r208833) @@ -245,7 +245,7 @@ acpi_sleep_machdep(struct acpi_softc *sc cr3 = rcr3(); load_cr3(KPML4phys); - stopfpu = &stopxpcbs[0]->xpcb_pcb.pcb_save; + stopfpu = stopxpcbs[0]->xpcb_pcb.pcb_save; if (acpi_savecpu(stopxpcbs[0])) { fpugetregs(curthread, stopfpu); Modified: head/sys/amd64/amd64/cpu_switch.S ============================================================================== --- head/sys/amd64/amd64/cpu_switch.S Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/amd64/cpu_switch.S Sat Jun 5 15:59:59 2010 (r208833) @@ -116,7 +116,7 @@ done_store_dr: /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) jne 1f - addq $PCB_SAVEFPU,%r8 + movq PCB_SAVEFPU(%r8),%r8 clts fxsave (%r8) smsw %ax @@ -341,7 +341,7 @@ ENTRY(savectx) je 1f movq TD_PCB(%rax),%rdi - leaq PCB_SAVEFPU(%rdi),%rdi + movq PCB_SAVEFPU(%rdi),%rdi clts fxsave (%rdi) smsw %ax @@ -349,7 +349,7 @@ ENTRY(savectx) lmsw %ax movq $PCB_SAVEFPU_SIZE,%rdx /* arg 3 */ - leaq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ + movq PCB_SAVEFPU(%rcx),%rsi /* arg 2 */ /* arg 1 (%rdi) already loaded */ call bcopy 1: Modified: head/sys/amd64/amd64/fpu.c ============================================================================== --- head/sys/amd64/amd64/fpu.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/amd64/fpu.c Sat Jun 5 15:59:59 2010 (r208833) @@ -91,8 +91,8 @@ void stop_emulating(void); #endif /* __GNUCLIKE_ASM && !lint */ -#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_cw) -#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save.sv_env.en_sw) +#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw) +#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw) typedef u_char bool_t; @@ -146,7 +146,7 @@ fpuexit(struct thread *td) savecrit = intr_disable(); if (curthread == PCPU_GET(fpcurthread)) { stop_emulating(); - fxsave(&PCPU_GET(curpcb)->pcb_save); + fxsave(PCPU_GET(curpcb)->pcb_save); start_emulating(); PCPU_SET(fpcurthread, 0); } @@ -424,8 +424,10 @@ fpudna(void) if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) fldcw(&pcb->pcb_initial_fpucw); pcb->pcb_flags |= PCB_FPUINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_USERFPUINITDONE; } else - fxrstor(&pcb->pcb_save); + fxrstor(pcb->pcb_save); intr_restore(s); } @@ -449,13 +451,39 @@ fpudrop() * It returns the FPU ownership status. */ int +fpugetuserregs(struct thread *td, struct savefpu *addr) +{ + register_t s; + struct pcb *pcb; + + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { + bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); + addr->sv_env.en_cw = pcb->pcb_initial_fpucw; + return (_MC_FPOWNED_NONE); + } + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fxsave(addr); + intr_restore(s); + return (_MC_FPOWNED_FPU); + } else { + intr_restore(s); + bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); + return (_MC_FPOWNED_PCB); + } +} + +int fpugetregs(struct thread *td, struct savefpu *addr) { register_t s; + struct pcb *pcb; - if ((td->td_pcb->pcb_flags & PCB_FPUINITDONE) == 0) { + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { bcopy(&fpu_initialstate, addr, sizeof(fpu_initialstate)); - addr->sv_env.en_cw = td->td_pcb->pcb_initial_fpucw; + addr->sv_env.en_cw = pcb->pcb_initial_fpucw; return (_MC_FPOWNED_NONE); } s = intr_disable(); @@ -465,7 +493,7 @@ fpugetregs(struct thread *td, struct sav return (_MC_FPOWNED_FPU); } else { intr_restore(s); - bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); + bcopy(pcb->pcb_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } @@ -474,19 +502,44 @@ fpugetregs(struct thread *td, struct sav * Set the state of the FPU. */ void +fpusetuserregs(struct thread *td, struct savefpu *addr) +{ + register_t s; + struct pcb *pcb; + + pcb = td->td_pcb; + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fxrstor(addr); + intr_restore(s); + pcb->pcb_flags |= PCB_FPUINITDONE | PCB_USERFPUINITDONE; + } else { + intr_restore(s); + bcopy(addr, &td->td_pcb->pcb_user_save, sizeof(*addr)); + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_FPUINITDONE; + pcb->pcb_flags |= PCB_USERFPUINITDONE; + } +} + +void fpusetregs(struct thread *td, struct savefpu *addr) { register_t s; + struct pcb *pcb; + pcb = td->td_pcb; s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { fxrstor(addr); intr_restore(s); } else { intr_restore(s); - bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); + bcopy(addr, td->td_pcb->pcb_save, sizeof(*addr)); } - curthread->td_pcb->pcb_flags |= PCB_FPUINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_USERFPUINITDONE; + pcb->pcb_flags |= PCB_FPUINITDONE; } /* @@ -575,3 +628,74 @@ static devclass_t fpupnp_devclass; DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); #endif /* DEV_ISA */ + +int +fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) +{ + struct pcb *pcb; + + pcb = td->td_pcb; + KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, + ("mangled pcb_save")); + ctx->flags = 0; + if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) + ctx->flags |= FPU_KERN_CTX_FPUINITDONE; + fpuexit(td); + ctx->prev = pcb->pcb_save; + pcb->pcb_save = &ctx->hwstate; + pcb->pcb_flags |= PCB_KERNFPU; + pcb->pcb_flags &= ~PCB_FPUINITDONE; + return (0); +} + +int +fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) +{ + struct pcb *pcb; + register_t savecrit; + + pcb = td->td_pcb; + savecrit = intr_disable(); + if (curthread == PCPU_GET(fpcurthread)) + fpudrop(); + intr_restore(savecrit); + pcb->pcb_save = ctx->prev; + if (pcb->pcb_save == &pcb->pcb_user_save) { + if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) + pcb->pcb_flags |= PCB_FPUINITDONE; + else + pcb->pcb_flags &= ~PCB_FPUINITDONE; + pcb->pcb_flags &= ~PCB_KERNFPU; + } else { + if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) + pcb->pcb_flags |= PCB_FPUINITDONE; + else + pcb->pcb_flags &= ~PCB_FPUINITDONE; + KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); + } + return (0); +} + +int +fpu_kern_thread(u_int flags) +{ + struct pcb *pcb; + + pcb = PCPU_GET(curpcb); + KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, + ("Only kthread may use fpu_kern_thread")); + KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); + KASSERT(PCB_USER_FPU(pcb), ("recursive call")); + + pcb->pcb_flags |= PCB_KERNFPU; + return (0); +} + +int +is_fpu_kern_thread(u_int flags) +{ + + if ((curthread->td_pflags & TDP_KTHREAD) == 0) + return (0); + return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNFPU) != 0); +} Modified: head/sys/amd64/amd64/machdep.c ============================================================================== --- head/sys/amd64/amd64/machdep.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/amd64/machdep.c Sat Jun 5 15:59:59 2010 (r208833) @@ -1960,7 +1960,7 @@ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { - fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs); + fill_fpregs_xmm(&td->td_pcb->pcb_user_save, fpregs); return (0); } @@ -1969,7 +1969,7 @@ int set_fpregs(struct thread *td, struct fpreg *fpregs) { - set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save); + set_fpregs_xmm(fpregs, &td->td_pcb->pcb_user_save); return (0); } @@ -2084,7 +2084,8 @@ static void get_fpcontext(struct thread *td, mcontext_t *mcp) { - mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate); + mcp->mc_ownedfp = fpugetuserregs(td, + (struct savefpu *)&mcp->mc_fpstate); mcp->mc_fpformat = fpuformat(); } @@ -2109,7 +2110,7 @@ set_fpcontext(struct thread *td, const m */ fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; - fpusetregs(td, fpstate); + fpusetuserregs(td, fpstate); } else return (EINVAL); return (0); @@ -2120,6 +2121,7 @@ fpstate_drop(struct thread *td) { register_t s; + KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); s = intr_disable(); if (PCPU_GET(fpcurthread) == td) fpudrop(); @@ -2133,7 +2135,8 @@ fpstate_drop(struct thread *td) * sendsig() is the only caller of fpugetregs()... perhaps we just * have too many layers. */ - curthread->td_pcb->pcb_flags &= ~PCB_FPUINITDONE; + curthread->td_pcb->pcb_flags &= ~(PCB_FPUINITDONE | + PCB_USERFPUINITDONE); intr_restore(s); } Modified: head/sys/amd64/amd64/mp_machdep.c ============================================================================== --- head/sys/amd64/amd64/mp_machdep.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/amd64/mp_machdep.c Sat Jun 5 15:59:59 2010 (r208833) @@ -1247,7 +1247,7 @@ cpususpend_handler(void) rf = intr_disable(); cr3 = rcr3(); - stopfpu = &stopxpcbs[cpu]->xpcb_pcb.pcb_save; + stopfpu = stopxpcbs[cpu]->xpcb_pcb.pcb_save; if (savectx2(stopxpcbs[cpu])) { fpugetregs(curthread, stopfpu); wbinvd(); Modified: head/sys/amd64/amd64/trap.c ============================================================================== --- head/sys/amd64/amd64/trap.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/amd64/trap.c Sat Jun 5 15:59:59 2010 (r208833) @@ -425,6 +425,8 @@ trap(struct trapframe *frame) case T_DNA: /* transparent fault (due to context switch "late") */ + KASSERT(PCB_USER_FPU(td->td_pcb), + ("kernel FPU ctx has leaked")); fpudna(); goto userout; @@ -449,16 +451,19 @@ trap(struct trapframe *frame) goto out; case T_DNA: + KASSERT(!PCB_USER_FPU(td->td_pcb), + ("Unregistered use of FPU in kernel")); + fpudna(); + goto out; + + case T_ARITHTRAP: /* arithmetic trap */ + case T_XMMFLT: /* SIMD floating-point exception */ + case T_FPOPFLT: /* FPU operand fetch fault */ /* - * The kernel is apparently using fpu for copying. - * XXX this should be fatal unless the kernel has - * registered such use. + * XXXKIB for now disable any FPU traps in kernel + * handler registration seems to be overkill */ - printf("fpudna in kernel mode!\n"); -#ifdef KDB - kdb_backtrace(); -#endif - fpudna(); + trap_fatal(frame, 0); goto out; case T_STKFLT: /* stack fault */ @@ -603,6 +608,8 @@ trap(struct trapframe *frame) user: userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); + KASSERT(PCB_USER_FPU(td->td_pcb), + ("Return from trap with kernel FPU ctx leaked")); userout: out: return; @@ -891,5 +898,12 @@ syscall(struct trapframe *frame) trapsignal(td, &ksi); } + KASSERT(PCB_USER_FPU(td->td_pcb), + ("System call %s returing with kernel FPU ctx leaked", + syscallname(td->td_proc, sa.code))); + KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save, + ("System call %s returning with mangled pcb_save", + syscallname(td->td_proc, sa.code))); + syscallret(td, error, &sa); } Modified: head/sys/amd64/amd64/vm_machdep.c ============================================================================== --- head/sys/amd64/amd64/vm_machdep.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/amd64/vm_machdep.c Sat Jun 5 15:59:59 2010 (r208833) @@ -122,7 +122,7 @@ cpu_fork(td1, p2, td2, flags) return; } - /* Ensure that p1's pcb is up to date. */ + /* Ensure that td1's pcb is up to date. */ fpuexit(td1); /* Point the pcb to the top of the stack */ @@ -130,9 +130,12 @@ cpu_fork(td1, p2, td2, flags) td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; - /* Copy p1's pcb */ + /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); + /* Properly initialize pcb_save */ + pcb2->pcb_save = &pcb2->pcb_user_save; + /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); @@ -308,6 +311,7 @@ cpu_thread_alloc(struct thread *td) td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)td->td_pcb - 1; + td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save; } void @@ -381,7 +385,8 @@ cpu_set_upcall(struct thread *td, struct * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); - pcb2->pcb_flags &= ~PCB_FPUINITDONE; + pcb2->pcb_flags &= ~(PCB_FPUINITDONE | PCB_USERFPUINITDONE); + pcb2->pcb_save = &pcb2->pcb_user_save; pcb2->pcb_full_iret = 1; /* Modified: head/sys/amd64/ia32/ia32_reg.c ============================================================================== --- head/sys/amd64/ia32/ia32_reg.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/ia32/ia32_reg.c Sat Jun 5 15:59:59 2010 (r208833) @@ -147,7 +147,7 @@ fill_fpregs32(struct thread *td, struct { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_save; + struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; @@ -182,7 +182,7 @@ set_fpregs32(struct thread *td, struct f { struct save87 *sv_87 = (struct save87 *)regs; struct env87 *penv_87 = &sv_87->sv_env; - struct savefpu *sv_fpu = &td->td_pcb->pcb_save; + struct savefpu *sv_fpu = &td->td_pcb->pcb_user_save; struct envxmm *penv_xmm = &sv_fpu->sv_env; int i; Modified: head/sys/amd64/include/fpu.h ============================================================================== --- head/sys/amd64/include/fpu.h Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/include/fpu.h Sat Jun 5 15:59:59 2010 (r208833) @@ -73,6 +73,17 @@ struct savefpu { u_char sv_pad[96]; } __aligned(16); +#ifdef _KERNEL +struct fpu_kern_ctx { + struct savefpu hwstate; + struct savefpu *prev; + uint32_t flags; +}; +#define FPU_KERN_CTX_FPUINITDONE 0x01 + +#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNFPU) == 0) +#endif + /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: @@ -102,9 +113,22 @@ void fpudrop(void); void fpuexit(struct thread *td); int fpuformat(void); int fpugetregs(struct thread *td, struct savefpu *addr); +int fpugetuserregs(struct thread *td, struct savefpu *addr); void fpuinit(void); void fpusetregs(struct thread *td, struct savefpu *addr); +void fpusetuserregs(struct thread *td, struct savefpu *addr); int fputrap(void); +int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, + u_int flags); +int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_thread(u_int flags); +int is_fpu_kern_thread(u_int flags); + +/* + * Flags for fpu_kern_enter() and fpu_kern_thread(). + */ +#define FPU_KERN_NORMAL 0x0000 + #endif #endif /* !_MACHINE_FPU_H_ */ Modified: head/sys/amd64/include/pcb.h ============================================================================== --- head/sys/amd64/include/pcb.h Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/amd64/include/pcb.h Sat Jun 5 15:59:59 2010 (r208833) @@ -57,7 +57,9 @@ struct pcb { register_t pcb_gsbase; u_long pcb_flags; #define PCB_DBREGS 0x02 /* process using debug registers */ +#define PCB_KERNFPU 0x04 /* kernel uses fpu */ #define PCB_FPUINITDONE 0x08 /* fpu state is initialized */ +#define PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */ #define PCB_GS32BIT 0x20 /* linux gs switch */ #define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */ #define PCB_FULLCTX 0x80 /* full context restore on sysret */ @@ -69,7 +71,7 @@ struct pcb { u_int64_t pcb_dr6; u_int64_t pcb_dr7; - struct savefpu pcb_save; + struct savefpu pcb_user_save; uint16_t pcb_initial_fpucw; caddr_t pcb_onfault; /* copyin/out fault recovery */ @@ -78,6 +80,7 @@ struct pcb { struct user_segment_descriptor pcb_gs32sd; /* local tss, with i/o bitmap; NULL for common */ struct amd64tss *pcb_tssp; + struct savefpu *pcb_save; char pcb_full_iret; }; Modified: head/sys/i386/i386/machdep.c ============================================================================== --- head/sys/i386/i386/machdep.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/i386/i386/machdep.c Sat Jun 5 15:59:59 2010 (r208833) @@ -3185,12 +3185,12 @@ fill_fpregs(struct thread *td, struct fp { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { - fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, + fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ - bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); + bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs, sizeof *fpregs); return (0); } @@ -3200,11 +3200,11 @@ set_fpregs(struct thread *td, struct fpr #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, - &td->td_pcb->pcb_save.sv_xmm); + &td->td_pcb->pcb_user_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ - bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); + bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87, sizeof *fpregs); return (0); } @@ -3331,7 +3331,7 @@ get_fpcontext(struct thread *td, mcontex addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } - mcp->mc_ownedfp = npxgetregs(td, addr); + mcp->mc_ownedfp = npxgetuserregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); @@ -3376,7 +3376,7 @@ set_fpcontext(struct thread *td, const m * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ - npxsetregs(td, addr); + npxsetuserregs(td, addr); #endif /* * Don't bother putting things back where they were in the @@ -3393,6 +3393,7 @@ fpstate_drop(struct thread *td) { register_t s; + KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) @@ -3408,7 +3409,8 @@ fpstate_drop(struct thread *td) * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ - curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; + curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | + PCB_NPXUSERINITDONE); intr_restore(s); } Modified: head/sys/i386/i386/ptrace_machdep.c ============================================================================== --- head/sys/i386/i386/ptrace_machdep.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/i386/i386/ptrace_machdep.c Sat Jun 5 15:59:59 2010 (r208833) @@ -51,7 +51,7 @@ cpu_ptrace(struct thread *td, int req, v if (!cpu_fxsr) return (EINVAL); - fpstate = &td->td_pcb->pcb_save.sv_xmm; + fpstate = &td->td_pcb->pcb_user_save.sv_xmm; switch (req) { case PT_GETXMMREGS: error = copyout(fpstate, addr, sizeof(*fpstate)); Modified: head/sys/i386/i386/swtch.s ============================================================================== --- head/sys/i386/i386/swtch.s Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/i386/i386/swtch.s Sat Jun 5 15:59:59 2010 (r208833) @@ -156,8 +156,7 @@ ENTRY(cpu_switch) /* have we used fp, and need a save? */ cmpl %ecx,PCPU(FPCURTHREAD) jne 1f - addl $PCB_SAVEFPU,%edx /* h/w bugs make saving complicated */ - pushl %edx + pushl PCB_SAVEFPU(%edx) /* h/w bugs make saving complicated */ call npxsave /* do it in a big C function */ popl %eax 1: @@ -408,7 +407,7 @@ ENTRY(savectx) pushl %ecx movl TD_PCB(%eax),%eax - leal PCB_SAVEFPU(%eax),%eax + movl PCB_SAVEFPU(%eax),%eax pushl %eax pushl %eax call npxsave @@ -417,7 +416,7 @@ ENTRY(savectx) popl %ecx pushl $PCB_SAVEFPU_SIZE - leal PCB_SAVEFPU(%ecx),%ecx + movl PCB_SAVEFPU(%ecx),%ecx pushl %ecx pushl %eax call bcopy Modified: head/sys/i386/i386/trap.c ============================================================================== --- head/sys/i386/i386/trap.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/i386/i386/trap.c Sat Jun 5 15:59:59 2010 (r208833) @@ -501,6 +501,8 @@ trap(struct trapframe *frame) case T_DNA: #ifdef DEV_NPX + KASSERT(PCB_USER_FPU(td->td_pcb), + ("kernel FPU ctx has leaked")); /* transparent fault (due to context switch "late") */ if (npxdna()) goto userout; @@ -533,20 +535,23 @@ trap(struct trapframe *frame) case T_DNA: #ifdef DEV_NPX - /* - * The kernel is apparently using npx for copying. - * XXX this should be fatal unless the kernel has - * registered such use. - */ - printf("npxdna in kernel mode!\n"); -#ifdef KDB - kdb_backtrace(); -#endif + KASSERT(!PCB_USER_FPU(td->td_pcb), + ("Unregistered use of FPU in kernel")); if (npxdna()) goto out; #endif break; + case T_ARITHTRAP: /* arithmetic trap */ + case T_XMMFLT: /* SIMD floating-point exception */ + case T_FPOPFLT: /* FPU operand fetch fault */ + /* + * XXXKIB for now disable any FPU traps in kernel + * handler registration seems to be overkill + */ + trap_fatal(frame, 0); + goto out; + /* * The following two traps can happen in * vm86 mode, and, if so, we want to handle @@ -752,6 +757,8 @@ trap(struct trapframe *frame) user: userret(td, frame); mtx_assert(&Giant, MA_NOTOWNED); + KASSERT(PCB_USER_FPU(td->td_pcb), + ("Return from trap with kernel FPU ctx leaked")); userout: out: return; @@ -1064,5 +1071,12 @@ syscall(struct trapframe *frame) trapsignal(td, &ksi); } + KASSERT(PCB_USER_FPU(td->td_pcb), + ("System call %s returning with kernel FPU ctx leaked", + syscallname(td->td_proc, sa.code))); + KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save, + ("System call %s returning with mangled pcb_save", + syscallname(td->td_proc, sa.code))); + syscallret(td, error, &sa); } Modified: head/sys/i386/i386/vm_machdep.c ============================================================================== --- head/sys/i386/i386/vm_machdep.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/i386/i386/vm_machdep.c Sat Jun 5 15:59:59 2010 (r208833) @@ -176,13 +176,13 @@ cpu_fork(td1, p2, td2, flags) return; } - /* Ensure that p1's pcb is up to date. */ + /* Ensure that td1's pcb is up to date. */ if (td1 == curthread) td1->td_pcb->pcb_gs = rgs(); #ifdef DEV_NPX savecrit = intr_disable(); if (PCPU_GET(fpcurthread) == td1) - npxsave(&td1->td_pcb->pcb_save); + npxsave(td1->td_pcb->pcb_save); intr_restore(savecrit); #endif @@ -191,9 +191,12 @@ cpu_fork(td1, p2, td2, flags) td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; - /* Copy p1's pcb */ + /* Copy td1's pcb */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); + /* Properly initialize pcb_save */ + pcb2->pcb_save = &pcb2->pcb_user_save; + /* Point mdproc and then copy over td1's contents */ mdp2 = &p2->p_md; bcopy(&p1->p_md, mdp2, sizeof(*mdp2)); @@ -372,6 +375,7 @@ cpu_thread_alloc(struct thread *td) td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1; td->td_pcb->pcb_ext = NULL; + td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save; } void @@ -437,7 +441,8 @@ cpu_set_upcall(struct thread *td, struct * values here. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); - pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE); + pcb2->pcb_flags &= ~(PCB_NPXTRAP|PCB_NPXINITDONE|PCB_NPXUSERINITDONE); + pcb2->pcb_save = &pcb2->pcb_user_save; /* * Create a new fresh stack for the new thread. Modified: head/sys/i386/include/npx.h ============================================================================== --- head/sys/i386/include/npx.h Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/i386/include/npx.h Sat Jun 5 15:59:59 2010 (r208833) @@ -143,6 +143,15 @@ union savefpu { #define IRQ_NPX 13 +struct fpu_kern_ctx { + union savefpu hwstate; + union savefpu *prev; + uint32_t flags; +}; +#define FPU_KERN_CTX_NPXINITDONE 0x01 + +#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0) + /* full reset on some systems, NOP on others */ #define npx_full_reset() outb(IO_NPX + 1, 0) @@ -151,10 +160,22 @@ void npxdrop(void); void npxexit(struct thread *td); int npxformat(void); int npxgetregs(struct thread *td, union savefpu *addr); +int npxgetuserregs(struct thread *td, union savefpu *addr); void npxinit(void); void npxsave(union savefpu *addr); void npxsetregs(struct thread *td, union savefpu *addr); +void npxsetuserregs(struct thread *td, union savefpu *addr); int npxtrap(void); +int fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, + u_int flags); +int fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx); +int fpu_kern_thread(u_int flags); +int is_fpu_kern_thread(u_int flags); + +/* + * Flags for fpu_kern_enter() and fpu_kern_thread(). + */ +#define FPU_KERN_NORMAL 0x0000 #endif Modified: head/sys/i386/include/pcb.h ============================================================================== --- head/sys/i386/include/pcb.h Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/i386/include/pcb.h Sat Jun 5 15:59:59 2010 (r208833) @@ -60,7 +60,7 @@ struct pcb { int pcb_dr6; int pcb_dr7; - union savefpu pcb_save; + union savefpu pcb_user_save; uint16_t pcb_initial_npxcw; u_int pcb_flags; #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ @@ -68,6 +68,8 @@ struct pcb { #define PCB_NPXTRAP 0x04 /* npx trap pending */ #define PCB_NPXINITDONE 0x08 /* fpu state is initialized */ #define PCB_VM86CALL 0x10 /* in vm86 call */ +#define PCB_NPXUSERINITDONE 0x20 /* user fpu state is initialized */ +#define PCB_KERNNPX 0x40 /* kernel uses npx */ caddr_t pcb_onfault; /* copyin/out fault recovery */ int pcb_gs; @@ -76,6 +78,7 @@ struct pcb { struct pcb_ext *pcb_ext; /* optional pcb extension */ int pcb_psl; /* process status long */ u_long pcb_vm86[2]; /* vm86bios scratch space */ + union savefpu *pcb_save; }; #ifdef _KERNEL Modified: head/sys/i386/isa/npx.c ============================================================================== --- head/sys/i386/isa/npx.c Sat Jun 5 14:53:34 2010 (r208832) +++ head/sys/i386/isa/npx.c Sat Jun 5 15:59:59 2010 (r208833) @@ -135,12 +135,12 @@ void stop_emulating(void); #ifdef CPU_ENABLE_SSE #define GET_FPU_CW(thread) \ (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_cw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_cw) + (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ + (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ (cpu_fxsr ? \ - (thread)->td_pcb->pcb_save.sv_xmm.sv_env.en_sw : \ - (thread)->td_pcb->pcb_save.sv_87.sv_env.en_sw) + (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ + (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) do { \ if (cpu_fxsr) \ (savefpu)->sv_xmm.sv_env.en_cw = (value); \ @@ -149,9 +149,9 @@ void stop_emulating(void); } while (0) #else /* CPU_ENABLE_SSE */ #define GET_FPU_CW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_cw) + (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw) #define GET_FPU_SW(thread) \ - (thread->td_pcb->pcb_save.sv_87.sv_env.en_sw) + (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw) #define SET_FPU_CW(savefpu, value) \ (savefpu)->sv_87.sv_env.en_cw = (value) #endif /* CPU_ENABLE_SSE */ @@ -502,7 +502,7 @@ npxexit(td) savecrit = intr_disable(); if (curthread == PCPU_GET(fpcurthread)) - npxsave(&PCPU_GET(curpcb)->pcb_save); + npxsave(PCPU_GET(curpcb)->pcb_save); intr_restore(savecrit); #ifdef NPX_DEBUG if (npx_exists) { @@ -809,6 +809,8 @@ npxdna(void) if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) fldcw(&pcb->pcb_initial_npxcw); pcb->pcb_flags |= PCB_NPXINITDONE; + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXUSERINITDONE; } else { /* * The following fpurstor() may cause an IRQ13 when the @@ -824,7 +826,7 @@ npxdna(void) * fnclex if it is the first FPU instruction after a context * switch. */ - fpurstor(&pcb->pcb_save); + fpurstor(pcb->pcb_save); } intr_restore(s); @@ -895,18 +897,18 @@ npxdrop() * It returns the FPU ownership status. */ int -npxgetregs(td, addr) - struct thread *td; - union savefpu *addr; +npxgetregs(struct thread *td, union savefpu *addr) { + struct pcb *pcb; register_t s; if (!npx_exists) return (_MC_FPOWNED_NONE); - if ((td->td_pcb->pcb_flags & PCB_NPXINITDONE) == 0) { + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); - SET_FPU_CW(addr, td->td_pcb->pcb_initial_npxcw); + SET_FPU_CW(addr, pcb->pcb_initial_npxcw); return (_MC_FPOWNED_NONE); } s = intr_disable(); @@ -925,7 +927,43 @@ npxgetregs(td, addr) return (_MC_FPOWNED_FPU); } else { intr_restore(s); - bcopy(&td->td_pcb->pcb_save, addr, sizeof(*addr)); + bcopy(pcb->pcb_save, addr, sizeof(*addr)); + return (_MC_FPOWNED_PCB); + } +} + +int +npxgetuserregs(struct thread *td, union savefpu *addr) +{ + struct pcb *pcb; + register_t s; + + if (!npx_exists) + return (_MC_FPOWNED_NONE); + + pcb = td->td_pcb; + if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) == 0) { + bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); + SET_FPU_CW(addr, pcb->pcb_initial_npxcw); + return (_MC_FPOWNED_NONE); + } + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { + fpusave(addr); +#ifdef CPU_ENABLE_SSE + if (!cpu_fxsr) +#endif + /* + * fnsave initializes the FPU and destroys whatever + * context it contains. Make sure the FPU owner + * starts with a clean state next time. + */ + npxdrop(); + intr_restore(s); + return (_MC_FPOWNED_FPU); + } else { + intr_restore(s); + bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); return (_MC_FPOWNED_PCB); } } @@ -934,15 +972,15 @@ npxgetregs(td, addr) * Set the state of the FPU. */ void -npxsetregs(td, addr) - struct thread *td; - union savefpu *addr; +npxsetregs(struct thread *td, union savefpu *addr) { + struct pcb *pcb; register_t s; if (!npx_exists) return; + pcb = td->td_pcb; s = intr_disable(); if (td == PCPU_GET(fpcurthread)) { #ifdef CPU_ENABLE_SSE @@ -953,9 +991,39 @@ npxsetregs(td, addr) intr_restore(s); } else { intr_restore(s); - bcopy(addr, &td->td_pcb->pcb_save, sizeof(*addr)); + bcopy(addr, pcb->pcb_save, sizeof(*addr)); + } + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXUSERINITDONE; + pcb->pcb_flags |= PCB_NPXINITDONE; +} + +void +npxsetuserregs(struct thread *td, union savefpu *addr) +{ + struct pcb *pcb; + register_t s; + + if (!npx_exists) + return; + + pcb = td->td_pcb; + s = intr_disable(); + if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { +#ifdef CPU_ENABLE_SSE + if (!cpu_fxsr) +#endif + fnclex(); /* As in npxdrop(). */ + fpurstor(addr); + intr_restore(s); + pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; + } else { + intr_restore(s); + bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); + if (PCB_USER_FPU(pcb)) + pcb->pcb_flags |= PCB_NPXINITDONE; + pcb->pcb_flags |= PCB_NPXUSERINITDONE; } - curthread->td_pcb->pcb_flags |= PCB_NPXINITDONE; } static void @@ -1124,3 +1192,74 @@ DRIVER_MODULE(npxisa, isa, npxisa_driver DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); #endif #endif /* DEV_ISA */ + +int +fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) +{ *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201006051600.o55G00Gr068861>