Date: Wed, 10 Jan 2001 21:35:52 +0900 (JST) From: NAKAMURA Kazushi <kaz@kobe1995.net> To: FreeBSD-gnats-submit@freebsd.org Cc: kaz@ns.kobe1995.net Subject: kern/24219: Pentium3 SSE patch for 4.2R Message-ID: <200101101235.f0ACZq000695@beauty.kobe1995.net>
next in thread | raw e-mail | index | archive | help
>Number: 24219 >Category: kern >Synopsis: <4.2R can't use Pentium3 SSE instructions> >Confidential: no >Severity: serious >Priority: medium >Responsible: freebsd-bugs >State: open >Quarter: >Keywords: >Date-Required: >Class: sw-bug >Submitter-Id: current-users >Arrival-Date: Wed Jan 10 04:40:01 PST 2001 >Closed-Date: >Last-Modified: >Originator: NAKAMURA Kazushi >Release: FreeBSD 4.2-RELEASE i386 >Organization: PCshop LABBIT >Environment: FreeBSD4.2R/Pentium3 >Description: FreeBSD4.2R's gcc 2.95.2 & gas 2.10.0 can compile & assemble MMX2,SSE instructions. But SSE instructions can't run on 4.2R's kernel. I don't know about 5-current, but I want to use SSE instructions on 4-stable. Please merge this patch. This patch is origined from titech.ac.jp(sorry I forget the URL). Caution: This patch has NOT tested on mendocino celeron. >How-To-Repeat: Any code that uses SSE instructions(XMM registers). >Fix: Apply this patch, and append "option CPU_ENABLE_SSE" to kernel configuration file. Then recompile the kernel. diff -ruN -9 sys/conf/options.i386 sys+/conf/options.i386 --- sys/conf/options.i386 Sat Sep 30 11:49:30 2000 +++ sys+/conf/options.i386 Tue Jan 9 13:44:18 2001 @@ -52,18 +52,19 @@ CPU_LOOP_EN opt_cpu.h CPU_PPRO2CELERON opt_cpu.h CPU_RSTK_EN opt_cpu.h CPU_SUSP_HLT opt_cpu.h CPU_UPGRADE_HW_CACHE opt_cpu.h CPU_WT_ALLOC opt_cpu.h CYRIX_CACHE_WORKS opt_cpu.h CYRIX_CACHE_REALLY_WORKS opt_cpu.h NO_MEMORY_HOLE opt_cpu.h +CPU_ENABLE_SSE opt_cpu.h # The CPU type affects the endian conversion functions all over the kernel. I386_CPU opt_global.h I486_CPU opt_global.h I586_CPU opt_global.h I686_CPU opt_global.h MAXCONS opt_syscons.h SC_ALT_MOUSE_IMAGE opt_syscons.h diff -ruN -9 sys/i386/conf/LINT sys+/i386/conf/LINT --- sys/i386/conf/LINT Sat Nov 18 18:22:07 2000 +++ sys+/i386/conf/LINT Tue Jan 9 13:45:36 2001 @@ -158,18 +158,20 @@ # # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space # of Cyrix 6x86 and 6x86MX CPUs by setting the NO_LOCK bit of CCR1. # Otherwise, the NO_LOCK bit of CCR1 is cleared. (NOTE 3) # # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables # reorder). This option should not be used if you use memory mapped # I/O device(s). # +# CPU_ENABLE_SSE enables SSE/MMX2 instructions support. +# # CPU_FASTER_5X86_FPU enables faster FPU exception handler. # # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products # for i386 machines. # # CPU_IORT defines I/O clock delay time (NOTE 1). Default values of # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively # (no clock delay). # diff -ruN -9 sys/i386/i386/genassym.c sys+/i386/i386/genassym.c --- sys/i386/i386/genassym.c Tue May 16 15:58:06 2000 +++ sys+/i386/i386/genassym.c Tue Jan 9 13:49:18 2001 @@ -126,20 +126,21 @@ ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext)); #ifdef SMP ASSYM(PCB_MPNEST, offsetof(struct pcb, pcb_mpnest)); #endif ASSYM(PCB_SPARE, offsetof(struct pcb, __pcb_spare)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); -ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_savefpu)); -ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct save87)); +ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save)); +ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu)); +ASSYM(PCB_SAVE87_SIZE, sizeof(struct save87)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); #ifdef SMP ASSYM(PCB_SIZE, sizeof(struct pcb)); #endif ASSYM(TF_TRAPNO, offsetof(struct trapframe, tf_trapno)); ASSYM(TF_ERR, offsetof(struct trapframe, tf_err)); ASSYM(TF_CS, offsetof(struct trapframe, tf_cs)); diff -ruN -9 sys/i386/i386/initcpu.c sys+/i386/i386/initcpu.c --- sys/i386/i386/initcpu.c Sun Oct 15 12:09:32 2000 +++ sys+/i386/i386/initcpu.c Tue Jan 9 14:04:38 2001 @@ -28,18 +28,19 @@ * * $FreeBSD: src/sys/i386/i386/initcpu.c,v 1.19.2.2 2000/10/15 03:09:32 nyan Exp $ */ #include "opt_cpu.h" #include <sys/param.h> #include <sys/kernel.h> #include <sys/systm.h> +#include <sys/sysctl.h> #include <machine/cputypes.h> #include <machine/md_var.h> #include <machine/specialreg.h> void initializecpu(void); #if defined(I586_CPU) && defined(CPU_WT_ALLOC) void enable_K5_wt_alloc(void); void enable_K6_wt_alloc(void); @@ -55,19 +56,27 @@ static void init_i486_on_386(void); #endif static void init_6x86(void); #endif /* I486_CPU */ #ifdef I686_CPU static void init_6x86MX(void); static void init_ppro(void); static void init_mendocino(void); +#if defined(CPU_ENABLE_SSE) +void init_sse(void); #endif +#endif /* I686_CPU */ + +int hw_instruction_sse = 0; +SYSCTL_INT(_hw, OID_AUTO, instruction_sse, CTLFLAG_RD, + &hw_instruction_sse, 0, + "SSE/MMX2 instructions available in CPU"); #ifdef I486_CPU /* * IBM Blue Lightning */ static void init_bluelightning(void) { u_long eflags; @@ -494,19 +503,28 @@ bbl_cr_ctl3 |= 5 << 1; #endif wrmsr(0x11e, bbl_cr_ctl3); } load_cr0(rcr0() & ~(CR0_CD | CR0_NW)); write_eflags(eflags); #endif /* CPU_PPRO2CELERON */ } - +#if defined(CPU_ENABLE_SSE) +void +init_sse(void) +{ + if ((cpu_feature & CPUID_XMM) && (cpu_feature & CPUID_FXSR)) { + load_cr4(rcr4() | CR4_FXSR | CR4_XMM); + cpu_fxsr = hw_instruction_sse = 1; + } +} +#endif #endif /* I686_CPU */ void initializecpu(void) { switch (cpu) { #ifdef I486_CPU case CPU_BLUE: @@ -538,18 +556,21 @@ if (strcmp(cpu_vendor, "GenuineIntel") == 0) { switch (cpu_id & 0xff0) { case 0x610: init_ppro(); break; case 0x660: init_mendocino(); break; } +#if defined(CPU_ENABLE_SSE) + init_sse(); +#endif } break; #endif default: break; } #if defined(PC98) && !defined(CPU_UPGRADE_HW_CACHE) /* diff -ruN -9 sys/i386/i386/locore.s sys+/i386/i386/locore.s --- sys/i386/i386/locore.s Fri Jul 7 09:38:46 2000 +++ sys+/i386/i386/locore.s Tue Jan 9 14:07:36 2001 @@ -90,24 +90,25 @@ ALIGN_DATA /* just to be sure */ .globl HIDENAME(tmpstk) .space 0x2000 /* space for tmpstk - temporary stack */ HIDENAME(tmpstk): .globl _boothowto,_bootdev .globl _cpu,_cpu_vendor,_cpu_id,_bootinfo - .globl _cpu_high, _cpu_feature + .globl _cpu_high, _cpu_feature, _cpu_fxsr _cpu: .long 0 /* are we 386, 386sx, or 486 */ _cpu_id: .long 0 /* stepping ID */ _cpu_high: .long 0 /* highest arg to CPUID */ _cpu_feature: .long 0 /* features */ +_cpu_fxsr: .long 0 /* use fxsave/fxrstor instruction */ _cpu_vendor: .space 20 /* CPU origin code */ _bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ _KERNend: .long 0 /* phys addr end of kernel (just after bss) */ physfree: .long 0 /* phys addr of next free page */ #ifdef SMP .globl _cpu0prvpage cpu0pp: .long 0 /* phys addr cpu0 private pg */ diff -ruN -9 sys/i386/i386/machdep.c sys+/i386/i386/machdep.c --- sys/i386/i386/machdep.c Fri Oct 27 18:07:22 2000 +++ sys+/i386/i386/machdep.c Tue Jan 9 14:21:51 2001 @@ -119,18 +119,22 @@ extern void dblfault_handler __P((void)); extern void printcpuinfo(void); /* XXX header file */ extern void earlysetcpuclass(void); /* same header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); static void cpu_startup __P((void *)); +#ifdef CPU_ENABLE_SSE +static void set_fpregs_xmm __P((struct save87 *, struct savexmm *)); +static void fill_fpregs_xmm __P((struct savexmm *, struct save87 *)); +#endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) static MALLOC_DEFINE(M_MBUF, "mbuf", "mbuf"); int _udatasel, _ucodesel; u_int atdevbase; #if defined(SWTCH_OPTIM_STATS) extern int swtch_optim_stats; @@ -2076,20 +2080,20 @@ tp = p->p_md.md_regs; frame_copy = *tp; *(int *)((char *)&frame_copy + (off - min)) = data; if (!EFL_SECURE(frame_copy.tf_eflags, tp->tf_eflags) || !CS_SECURE(frame_copy.tf_cs)) return (EINVAL); *(int*)((char *)p->p_addr + off) = data; return (0); } - min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_savefpu); - if (off >= min && off <= min + sizeof(struct save87) - sizeof(int)) { + min = offsetof(struct user, u_pcb) + offsetof(struct pcb, pcb_save); + if (off >= min && off <= min + sizeof(union savefpu) - sizeof(int)) { *(int*)((char *)p->p_addr + off) = data; return (0); } return (EFAULT); } int fill_regs(p, regs) struct proc *p; @@ -2145,33 +2149,101 @@ tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb = &p->p_addr->u_pcb; pcb->pcb_gs = regs->r_gs; return (0); } +#ifdef CPU_ENABLE_SSE +static void +fill_fpregs_xmm(sv_xmm, sv_87) + struct savexmm *sv_xmm; + struct save87 *sv_87; +{ + register struct env87 *penv_87 = &sv_87->sv_env; + register struct envxmm *penv_xmm = &sv_xmm->sv_env; + int i; + + /* FPU control/status */ + penv_87->en_cw = penv_xmm->en_cw; + penv_87->en_sw = penv_xmm->en_sw; + penv_87->en_tw = penv_xmm->en_tw; + penv_87->en_fip = penv_xmm->en_fip; + penv_87->en_fcs = penv_xmm->en_fcs; + penv_87->en_opcode = penv_xmm->en_opcode; + penv_87->en_foo = penv_xmm->en_foo; + penv_87->en_fos = penv_xmm->en_fos; + + /* FPU registers */ + for (i = 0; i < 8; ++i) + sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; + + sv_87->sv_ex_sw = sv_xmm->sv_ex_sw; +} + +static void +set_fpregs_xmm(sv_87, sv_xmm) + struct save87 *sv_87; + struct savexmm *sv_xmm; +{ + register struct env87 *penv_87 = &sv_87->sv_env; + register struct envxmm *penv_xmm = &sv_xmm->sv_env; + int i; + +/* FPU control/status */ + penv_xmm->en_cw = penv_87->en_cw; + penv_xmm->en_sw = penv_87->en_sw; + penv_xmm->en_tw = penv_87->en_tw; + penv_xmm->en_fip = penv_87->en_fip; + penv_xmm->en_fcs = penv_87->en_fcs; + penv_xmm->en_opcode = penv_87->en_opcode; + penv_xmm->en_foo = penv_87->en_foo; + penv_xmm->en_fos = penv_87->en_fos; + + /* FPU registers */ + for (i = 0; i < 8; ++i) + sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; + + sv_xmm->sv_ex_sw = sv_87->sv_ex_sw; +} +#endif /* CPU_ENABLE_SSE */ + int fill_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { - bcopy(&p->p_addr->u_pcb.pcb_savefpu, fpregs, sizeof *fpregs); +#ifdef CPU_ENABLE_SSE + if (cpu_fxsr) { + fill_fpregs_xmm(&p->p_addr->u_pcb.pcb_save.sv_xmm, + (struct save87 *)fpregs); + return (0); + } +#endif /* CPU_ENABLE_SSE */ + bcopy(&p->p_addr->u_pcb.pcb_save.sv_87, fpregs, sizeof *fpregs); return (0); } int set_fpregs(p, fpregs) struct proc *p; struct fpreg *fpregs; { - bcopy(fpregs, &p->p_addr->u_pcb.pcb_savefpu, sizeof *fpregs); +#ifdef CPU_ENABLE_SSE + if (cpu_fxsr) { + set_fpregs_xmm((struct save87 *)fpregs, + &p->p_addr->u_pcb.pcb_save.sv_xmm); + return (0); + } +#endif /* CPU_ENABLE_SSE */ + bcopy(fpregs, &p->p_addr->u_pcb.pcb_save.sv_87, sizeof *fpregs); return (0); } int fill_dbregs(p, dbregs) struct proc *p; struct dbreg *dbregs; { struct pcb *pcb; diff -ruN -9 sys/i386/i386/mp_machdep.c sys+/i386/i386/mp_machdep.c --- sys/i386/i386/mp_machdep.c Sat Sep 30 11:49:32 2000 +++ sys+/i386/i386/mp_machdep.c Tue Jan 9 14:26:16 2001 @@ -229,18 +229,22 @@ #define MP_ENABLE_POST 0x14 #define MPTABLE_PASS2_POST 0x15 #define START_ALL_APS_POST 0x16 #define INSTALL_AP_TRAMP_POST 0x17 #define START_AP_POST 0x18 #define MP_ANNOUNCE_POST 0x19 +#if defined(CPU_ENABLE_SSE) +extern void init_sse(void); +#endif + /** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ int current_postcode; /** XXX FIXME: what system files declare these??? */ extern struct region_descriptor r_gdt, r_idt; int bsp_apic_ready = 0; /* flags useability of BSP apic */ int mp_ncpus; /* # of CPUs, including BSP */ @@ -2392,18 +2396,23 @@ other_cpus = all_cpus & ~(1 << cpuid); printf("SMP: AP CPU #%d Launched!\n", cpuid); /* XXX FIXME: i386 specific, and redundant: Setup the FPU. */ load_cr0((rcr0() & ~CR0_EM) | CR0_MP | CR0_NE | CR0_TS); /* set up FPU state on the AP */ npxinit(__INITIAL_NPXCW__); + + /* Setup the SSE */ +#if defined(CPU_ENABLE_SSE) + init_sse(); +#endif /* A quick check from sanity claus */ apic_id = (apic_id_to_logical[(lapic.id & 0x0f000000) >> 24]); if (cpuid != apic_id) { printf("SMP: cpuid = %d\n", cpuid); printf("SMP: apic_id = %d\n", apic_id); printf("PTD[MPPTDI] = %p\n", (void *)PTD[MPPTDI]); panic("cpuid mismatch! boom!!"); } diff -ruN -9 sys/i386/i386/support.s sys+/i386/i386/support.s --- sys/i386/i386/support.s Sat Sep 30 11:49:33 2000 +++ sys+/i386/i386/support.s Tue Jan 9 14:29:32 2001 @@ -948,19 +948,19 @@ src in %esi dst in %edi len in %ecx XXX changed to on stack for profiling uses %eax and %edx for tmp. storage */ /* XXX use ENTRY() to get profiling. fastmove() is actually a non-entry. */ ENTRY(fastmove) pushl %ebp movl %esp,%ebp - subl $PCB_SAVEFPU_SIZE+3*4,%esp + subl $PCB_SAVE87_SIZE+3*4,%esp movl 8(%ebp),%ecx cmpl $63,%ecx jbe fastmove_tail testl $7,%esi /* check if src addr is multiple of 8 */ jnz fastmove_tail testl $7,%edi /* check if dst addr is multiple of 8 */ @@ -987,19 +987,19 @@ */ /* tmp = curpcb->pcb_savefpu; */ movl %ecx,-12(%ebp) movl %esi,-8(%ebp) movl %edi,-4(%ebp) movl %esp,%edi movl _curpcb,%esi addl $PCB_SAVEFPU,%esi cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx + movl $PCB_SAVE87_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx movl -8(%ebp),%esi movl -4(%ebp),%edi /* stop_emulating(); */ clts /* npxproc = curproc; */ movl _curproc,%eax @@ -1064,19 +1064,19 @@ /* curpcb->pcb_savefpu = tmp; */ movl %ecx,-12(%ebp) movl %esi,-8(%ebp) movl %edi,-4(%ebp) movl _curpcb,%edi addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx + movl $PCB_SAVE87_SIZE>>2,%ecx rep movsl movl -12(%ebp),%ecx movl -8(%ebp),%esi movl -4(%ebp),%edi /* start_emulating(); */ smsw %ax orb $CR0_TS,%al @@ -1103,19 +1103,19 @@ popl %ebp ret ALIGN_TEXT fastmove_fault: movl _curpcb,%edi addl $PCB_SAVEFPU,%edi movl %esp,%esi cld - movl $PCB_SAVEFPU_SIZE>>2,%ecx + movl $PCB_SAVE87_SIZE>>2,%ecx rep movsl smsw %ax orb $CR0_TS,%al lmsw %ax movl $0,_npxproc fastmove_tail_fault: diff -ruN -9 sys/i386/i386/vm_machdep.c sys+/i386/i386/vm_machdep.c --- sys/i386/i386/vm_machdep.c Sat Aug 26 13:19:26 2000 +++ sys+/i386/i386/vm_machdep.c Tue Jan 9 14:30:56 2001 @@ -135,19 +135,19 @@ } } #endif return; } #if NNPX > 0 /* Ensure that p1's pcb is up to date. */ if (npxproc == p1) - npxsave(&p1->p_addr->u_pcb.pcb_savefpu); + npxsave(&p1->p_addr->u_pcb.pcb_save); #endif /* Copy p1's pcb. */ p2->p_addr->u_pcb = p1->p_addr->u_pcb; pcb2 = &p2->p_addr->u_pcb; /* * Create a new fresh stack for the new process. * Copy the trap frame for the return to user mode as if from a diff -ruN -9 sys/i386/include/asnames.h sys+/i386/include/asnames.h --- sys/i386/include/asnames.h Tue May 16 15:58:10 2000 +++ sys+/i386/include/asnames.h Tue Jan 9 14:32:12 2001 @@ -185,18 +185,19 @@ #define _copyin_vector copyin_vector #define _copyout_vector copyout_vector #define _cpl cpl #define _cpl_lock cpl_lock #define _cpu cpu #define _cpu0prvpage cpu0prvpage #define _cpu_apic_versions cpu_apic_versions #define _cpu_class cpu_class #define _cpu_feature cpu_feature +#define _cpu_fxsr cpu_fxsr #define _cpu_high cpu_high #define _cpu_id cpu_id #define _cpu_num_to_apic_id cpu_num_to_apic_id #define _cpu_switch cpu_switch #define _cpu_vendor cpu_vendor #define _default_halt default_halt #define _denormal_operand denormal_operand #define _div_small div_small #define _divide_by_zero divide_by_zero diff -ruN -9 sys/i386/include/md_var.h sys+/i386/include/md_var.h --- sys/i386/include/md_var.h Mon Feb 21 05:51:23 2000 +++ sys+/i386/include/md_var.h Tue Jan 9 14:33:24 2001 @@ -41,18 +41,19 @@ extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); extern int busdma_swi_pending; extern int (*copyin_vector) __P((const void *udaddr, void *kaddr, size_t len)); extern int (*copyout_vector) __P((const void *kaddr, void *udaddr, size_t len)); extern u_int cpu_feature; extern u_int cpu_high; extern u_int cpu_id; +extern u_int cpu_fxsr; extern char cpu_vendor[]; extern u_int cyrix_did; extern char kstack[]; #ifdef PC98 extern int need_pre_dma_flush; extern int need_post_dma_flush; #endif extern void (*netisrs[32]) __P((void)); extern int nfs_diskless_valid; diff -ruN -9 sys/i386/include/npx.h sys+/i386/include/npx.h --- sys/i386/include/npx.h Sat Mar 11 02:56:33 2000 +++ sys+/i386/include/npx.h Tue Jan 9 14:40:14 2001 @@ -81,18 +81,54 @@ * struct and arrange to store into this struct (ending here) * before it is inspected for ptracing or for core dumps. Some * emulators overwrite the whole struct. We have no good way of * knowing how much padding to leave. Leave just enough for the * GPL emulator's i387_union (176 bytes total). */ u_char sv_pad[64]; /* padding; used by emulators */ }; +struct envxmm { + u_int16_t en_cw; /* control word (16bits) */ + u_int16_t en_sw; /* status word (16bits) */ + u_int16_t en_tw; /* tag word (16bits) */ + u_int16_t en_opcode; /* opcode last executed (11 bits ) */ + u_int32_t en_fip; /* floating point instruction pointer */ + u_int16_t en_fcs; /* floating code segment selector */ + u_int16_t en_pad0; /* padding */ + u_int32_t en_foo; /* floating operand offset */ + u_int16_t en_fos; /* floating operand segment selector */ + u_int16_t en_pad1; /* padding */ + u_int32_t en_mxcsr; /* SSE sontorol/status register */ + u_int32_t en_pad2; /* padding */ +}; + +/* Contents of each SSE extended accumulator */ +struct xmmacc { + u_char xmm_bytes[16]; +}; + +struct savexmm { + struct envxmm sv_env; + struct { + struct fpacc87 fp_acc; + u_char fp_pad[6]; /* padding */ + } sv_fp[8]; + struct xmmacc sv_xmm[8]; + u_long sv_ex_sw; /* status word for last exception */ + u_char sv_pad[220]; +}; + +union savefpu { + struct save87 sv_87; + struct savexmm sv_xmm; +}; + /* * The hardware default control word for i387's and later coprocessors is * 0x37F, giving: * * round to nearest * 64-bit precision * all exceptions masked. * * We modify the affine mode bit and precision bits in this to give: @@ -108,13 +144,13 @@ #ifdef _KERNEL #ifndef npxproc extern struct proc *npxproc; #endif int npxdna __P((void)); void npxexit __P((struct proc *p)); void npxinit __P((int control)); -void npxsave __P((struct save87 *addr)); +void npxsave __P((union savefpu *addr)); #endif #endif /* !_MACHINE_NPX_H_ */ diff -ruN -9 sys/i386/include/pcb.h sys+/i386/include/pcb.h --- sys/i386/include/pcb.h Wed Dec 29 13:33:03 1999 +++ sys+/i386/include/pcb.h Tue Jan 9 14:43:07 2001 @@ -61,19 +61,20 @@ int pcb_dr3; int pcb_dr6; int pcb_dr7; #ifdef USER_LDT struct pcb_ldt *pcb_ldt; /* per process (user) LDT */ #else struct pcb_ldt *pcb_ldt_dontuse; #endif - struct save87 pcb_savefpu; /* floating point state for 287/387 */ + union savefpu pcb_save; +#define pcb_savefpu pcb_save.sv_87 u_char pcb_flags; #define FP_SOFTFP 0x01 /* process using software fltng pnt emulator */ #define PCB_DBREGS 0x02 /* process using debug registers */ caddr_t pcb_onfault; /* copyin/out fault recovery */ #ifdef SMP u_long pcb_mpnest; #else u_long pcb_mpnest_dontuse; #endif diff -ruN -9 sys/i386/include/specialreg.h sys+/i386/include/specialreg.h --- sys/i386/include/specialreg.h Sat Sep 11 00:51:44 1999 +++ sys+/i386/include/specialreg.h Tue Jan 9 14:44:25 2001 @@ -87,18 +87,20 @@ #define CPUID_MCE 0x0080 #define CPUID_CX8 0x0100 #define CPUID_APIC 0x0200 #define CPUID_B10 0x0400 #define CPUID_B11 0x0800 #define CPUID_MTRR 0x1000 #define CPUID_PGE 0x2000 #define CPUID_MCA 0x4000 #define CPUID_CMOV 0x8000 +#define CPUID_FXSR 0x01000000 +#define CPUID_XMM 0x02000000 /* * Model-specific registers for the i386 family */ #define MSR_P5_MC_ADDR 0x000 #define MSR_P5_MC_TYPE 0x001 #define MSR_TSC 0x010 #define MSR_APICBASE 0x01b #define MSR_EBL_CR_POWERON 0x02a diff -ruN -9 sys/i386/isa/npx.c sys+/i386/isa/npx.c --- sys/i386/isa/npx.c Sun Jan 30 01:17:36 2000 +++ sys+/i386/isa/npx.c Tue Jan 9 15:06:48 2001 @@ -29,18 +29,19 @@ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 * $FreeBSD: src/sys/i386/isa/npx.c,v 1.80 2000/01/29 16:17:36 peter Exp $ */ +#include "opt_cpu.h" #include "opt_debug_npx.h" #include "opt_math_emulate.h" #include <sys/param.h> #include <sys/systm.h> #include <sys/bus.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/module.h> @@ -90,45 +91,60 @@ #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) #define fnclex() __asm("fnclex") #define fninit() __asm("fninit") #define fnop() __asm("fnop") #define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) #define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) #define fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) #define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") #define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) +#define fxrstor(addr) __asm("fxrstor %0" : : "m" (*(addr))) +#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ : : "n" (CR0_TS) : "ax") #define stop_emulating() __asm("clts") #else /* not __GNUC__ */ void fldcw __P((caddr_t addr)); void fnclex __P((void)); void fninit __P((void)); void fnop __P((void)); void fnsave __P((caddr_t addr)); void fnstcw __P((caddr_t addr)); void fnstsw __P((caddr_t addr)); void fp_divide_by_0 __P((void)); void frstor __P((caddr_t addr)); +void fxsave __P((caddr_t addr)); +void fxrstor __P((caddr_t addr)); void start_emulating __P((void)); void stop_emulating __P((void)); #endif /* __GNUC__ */ +#ifdef CPU_ENABLE_SSE +#define FPU_STATUS_EX(pcb) \ + (cpu_fxsr ? \ + &(pcb)->pcb_save.sv_xmm.sv_ex_sw : \ + &(pcb)->pcb_save.sv_87.sv_ex_sw) +#else /* CPU_ENABLE_SSE */ +#define FPU_STATUS_EX(pcb) (&(pcb)->pcb_save.sv_87.sv_ex_sw) +#endif /* CPU_ENABLE_SSE */ + typedef u_char bool_t; static int npx_attach __P((device_t dev)); void npx_intr __P((void *)); static void npx_identify __P((driver_t *driver, device_t parent)); static int npx_probe __P((device_t dev)); static int npx_probe1 __P((device_t dev)); +static void fpusave __P((union savefpu *)); +static void fpurstor __P((union savefpu *)); #ifdef I586_CPU static long timezero __P((const char *funcname, void (*func)(void *buf, size_t len))); #endif /* I586_CPU */ int hw_float; /* XXX currently just alias for npx_exists */ SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &hw_float, 0, @@ -468,45 +484,45 @@ } /* * Initialize floating point unit. */ void npxinit(control) u_short control; { - struct save87 dummy; + union savefpu dummy; if (!npx_exists) return; /* * fninit has the same h/w bugs as fnsave. Use the detoxified * fnsave to throw away any junk in the fpu. npxsave() initializes * the fpu and sets npxproc = NULL as important side effects. */ npxsave(&dummy); stop_emulating(); fldcw(&control); if (curpcb != NULL) - fnsave(&curpcb->pcb_savefpu); + fpusave(&curpcb->pcb_save); start_emulating(); } /* * Free coprocessor (if we have it). */ void npxexit(p) struct proc *p; { if (p == npxproc) - npxsave(&curpcb->pcb_savefpu); + npxsave(&curpcb->pcb_save); #ifdef NPX_DEBUG if (npx_exists) { u_int masked_exceptions; masked_exceptions = curpcb->pcb_savefpu.sv_env.en_cw & curpcb->pcb_savefpu.sv_env.en_sw & 0x7f; /* * Log exceptions that would have trapped with the old * control word (overflow, divide by 0, and invalid operand). @@ -708,32 +724,35 @@ * solution for signals other than SIGFPE. */ void npx_intr(dummy) void *dummy; { int code; u_short control; struct intrframe *frame; + u_long *pstatus; if (npxproc == NULL || !npx_exists) { printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n", npxproc, curproc, npx_exists); panic("npxintr from nowhere"); } if (npxproc != curproc) { printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n", npxproc, curproc, npx_exists); panic("npxintr from non-current process"); } + pstatus = FPU_STATUS_EX(curpcb); + outb(0xf0, 0); - fnstsw(&curpcb->pcb_savefpu.sv_ex_sw); + fnstsw(pstatus); fnstcw(&control); fnclex(); /* * Pass exception to process. */ frame = (struct intrframe *)&dummy; /* XXX */ if ((ISPL(frame->if_cs) == SEL_UPL) || (frame->if_eflags & PSL_VM)) { /* @@ -747,20 +766,19 @@ * in doreti, and the frame for that could easily be set up * just before it is used). */ curproc->p_md.md_regs = INTR_TO_TRAPFRAME(frame); /* * Encode the appropriate code for detailed information on * this exception. */ code = - fpetable[(curpcb->pcb_savefpu.sv_ex_sw & ~control & 0x3f) | - (curpcb->pcb_savefpu.sv_ex_sw & 0x40)]; + fpetable[(*pstatus & ~control & 0x3f) | (*pstatus & 0x40)]; trapsignal(curproc, SIGFPE, code); } else { /* * Nested interrupt. These losers occur when: * o an IRQ13 is bogusly generated at a bogus time, e.g.: * o immediately after an fnsave or frstor of an * error state. * o a couple of 386 instructions after * "fstpl _memvar" causes a stack overflow. @@ -779,69 +797,73 @@ * Implement device not available (DNA) exception * * It would be better to switch FP context here (if curproc != npxproc) * and not necessarily for every context switch, but it is too hard to * access foreign pcb's. */ int npxdna() { + u_long *pstatus; + if (!npx_exists) return (0); if (npxproc != NULL) { printf("npxdna: npxproc = %p, curproc = %p\n", npxproc, curproc); panic("npxdna"); } stop_emulating(); /* * Record new context early in case frstor causes an IRQ13. */ npxproc = curproc; - curpcb->pcb_savefpu.sv_ex_sw = 0; + + pstatus = FPU_STATUS_EX(curpcb); + *pstatus = 0; + /* * The following frstor may cause an IRQ13 when the state being * restored has a pending error. The error will appear to have been * triggered by the current (npx) user instruction even when that * instruction is a no-wait instruction that should not trigger an * error (e.g., fnclex). On at least one 486 system all of the * no-wait instructions are broken the same as frstor, so our * treatment does not amplify the breakage. On at least one * 386/Cyrix 387 system, fnclex works correctly while frstor and * fnsave are broken, so our treatment breaks fnclex if it is the * first FPU instruction after a context switch. */ - frstor(&curpcb->pcb_savefpu); + fpurstor(&curpcb->pcb_save); return (1); } /* * Wrapper for fnsave instruction to handle h/w bugs. If there is an error * pending, then fnsave generates a bogus IRQ13 on some systems. Force * any IRQ13 to be handled immediately, and then ignore it. This routine is * often called at splhigh so it must not use many system services. In * particular, it's much easier to install a special handler than to * guarantee that it's safe to use npxintr() and its supporting code. */ void npxsave(addr) - struct save87 *addr; + union savefpu *addr; { -#ifdef SMP - +#if defined(SMP) || defined(CPU_ENABLE_SSE) stop_emulating(); - fnsave(addr); + fpusave(addr); /* fnop(); */ start_emulating(); npxproc = NULL; -#else /* SMP */ +#else /* SMP or CPU_ENABLE_SSE */ u_char icu1_mask; u_char icu2_mask; u_char old_icu1_mask; u_char old_icu2_mask; struct gate_descriptor save_idt_npxintr; disable_intr(); old_icu1_mask = inb(IO_ICU1 + 1); @@ -862,18 +884,50 @@ outb(IO_ICU1 + 1, (icu1_mask & ~npx0_imask) | (old_icu1_mask & npx0_imask)); outb(IO_ICU2 + 1, (icu2_mask & ~(npx0_imask >> 8)) | (old_icu2_mask & (npx0_imask >> 8))); idt[npx_intrno] = save_idt_npxintr; enable_intr(); /* back to usual state */ #endif /* SMP */ +} + +static void +fpusave(addr) + union savefpu *addr; +{ +#ifdef CPU_ENABLE_SSE + static struct savexmm svxmm __attribute__((aligned(16))); + + if (cpu_fxsr) { + fxsave(&svxmm); + bcopy(&svxmm, addr, sizeof(struct savexmm)); + return; + } +#endif /* CPU_ENABLE_SSE */ + fnsave(addr); +} + +static void +fpurstor(addr) + union savefpu *addr; +{ +#ifdef CPU_ENABLE_SSE + static struct savexmm svxmm __attribute__((aligned(16))); + + if (cpu_fxsr) { + bcopy(addr, &svxmm, sizeof (struct savexmm)); + fxrstor(&svxmm); + return; + } +#endif /* CPU_ENABLE_SSE */ + frstor(addr); } #ifdef I586_CPU static long timezero(funcname, func) const char *funcname; void (*func) __P((void *buf, size_t len)); { >Release-Note: >Audit-Trail: >Unformatted: To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-bugs" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200101101235.f0ACZq000695>